ref: 5ebc8febdc25ea79a89ac01fef21f84fd53b3143
parent: 55c6a74bd4f228e48d56de200f25154eb733fc40
author: Jingning Han <[email protected]>
date: Thu Jul 23 12:35:44 EDT 2015
Refactor vp9_idct.h file Separate the common coefficient constant into vpx_dsp/txfm_common.h. Move the SSE2 macro definitions to vpx_dsp/x86/txfm_common_sse2.h. This clears the use case of vp9_idct.h in vpx_dsp folder. Change-Id: I319735a2abf42888e5080ac14cfbcde34be7b121
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -14,6 +14,7 @@
#include <assert.h>
#include "./vpx_config.h"
+#include "vpx_dsp/txfm_common.h"
#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_enums.h"
@@ -21,68 +22,6 @@
#ifdef __cplusplus
extern "C" {
#endif
-
-// Constants and Macros used by all idct/dct functions
-#define DCT_CONST_BITS 14
-#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
-
-#define UNIT_QUANT_SHIFT 2
-#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
-
-#define pair_set_epi16(a, b) \
- _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
- (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a))
-
-#define dual_set_epi16(a, b) \
- _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
- (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))
-
-#define octa_set_epi16(a, b, c, d, e, f, g, h) \
- _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \
- (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h))
-
-// Constants:
-// for (int i = 1; i< 32; ++i)
-// printf("static const int cospi_%d_64 = %.0f;\n", i,
-// round(16384 * cos(i*M_PI/64)));
-// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
-static const tran_high_t cospi_1_64 = 16364;
-static const tran_high_t cospi_2_64 = 16305;
-static const tran_high_t cospi_3_64 = 16207;
-static const tran_high_t cospi_4_64 = 16069;
-static const tran_high_t cospi_5_64 = 15893;
-static const tran_high_t cospi_6_64 = 15679;
-static const tran_high_t cospi_7_64 = 15426;
-static const tran_high_t cospi_8_64 = 15137;
-static const tran_high_t cospi_9_64 = 14811;
-static const tran_high_t cospi_10_64 = 14449;
-static const tran_high_t cospi_11_64 = 14053;
-static const tran_high_t cospi_12_64 = 13623;
-static const tran_high_t cospi_13_64 = 13160;
-static const tran_high_t cospi_14_64 = 12665;
-static const tran_high_t cospi_15_64 = 12140;
-static const tran_high_t cospi_16_64 = 11585;
-static const tran_high_t cospi_17_64 = 11003;
-static const tran_high_t cospi_18_64 = 10394;
-static const tran_high_t cospi_19_64 = 9760;
-static const tran_high_t cospi_20_64 = 9102;
-static const tran_high_t cospi_21_64 = 8423;
-static const tran_high_t cospi_22_64 = 7723;
-static const tran_high_t cospi_23_64 = 7005;
-static const tran_high_t cospi_24_64 = 6270;
-static const tran_high_t cospi_25_64 = 5520;
-static const tran_high_t cospi_26_64 = 4756;
-static const tran_high_t cospi_27_64 = 3981;
-static const tran_high_t cospi_28_64 = 3196;
-static const tran_high_t cospi_29_64 = 2404;
-static const tran_high_t cospi_30_64 = 1606;
-static const tran_high_t cospi_31_64 = 804;
-
-// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
-static const tran_high_t sinpi_1_9 = 5283;
-static const tran_high_t sinpi_2_9 = 9929;
-static const tran_high_t sinpi_3_9 = 13377;
-static const tran_high_t sinpi_4_9 = 15212;
static INLINE tran_low_t check_range(tran_high_t input) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -9,9 +9,9 @@
*/
#include "./vp9_rtcd.h"
-#include "vpx_ports/mem.h"
#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
-#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+#include "vpx_ports/mem.h"
#define RECON_AND_STORE4X4(dest, in_x) \
{ \
--- a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
+++ b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
@@ -11,6 +11,8 @@
#include <emmintrin.h> // SSE2
#include "vp9/encoder/vp9_dct.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
#if DCT_HIGH_BIT_DEPTH
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -13,8 +13,9 @@
#include "./vp9_rtcd.h"
#include "./vpx_dsp_rtcd.h"
-#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vp9/encoder/x86/vp9_dct_sse2.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
--- a/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -18,6 +18,7 @@
#include "./vp9_rtcd.h"
#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
int16_t* coeff_ptr, intptr_t n_coeffs,
--- a/vpx_dsp/arm/fwd_txfm_neon.c
+++ b/vpx_dsp/arm/fwd_txfm_neon.c
@@ -9,8 +9,9 @@
*/
#include <arm_neon.h>
+
#include "./vpx_config.h"
-#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/txfm_common.h"
void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
int i;
--- a/vpx_dsp/fwd_txfm.h
+++ b/vpx_dsp/fwd_txfm.h
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/txfm_common.h"
static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
--- a/vpx_dsp/mips/fwd_txfm_msa.h
+++ b/vpx_dsp/mips/fwd_txfm_msa.h
@@ -12,7 +12,7 @@
#define VPX_DSP_MIPS_FWD_TXFM_MSA_H_
#include "vpx_dsp/mips/txfm_macros_msa.h"
-#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/txfm_common.h"
#define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) { \
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \
--- /dev/null
+++ b/vpx_dsp/txfm_common.h
@@ -1,0 +1,66 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_TXFM_COMMON_H_
+#define VPX_DSP_TXFM_COMMON_H_
+
+#include "vpx_dsp/vpx_dsp_common.h"
+
+// Constants and Macros used by all idct/dct functions
+#define DCT_CONST_BITS 14
+#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
+
+#define UNIT_QUANT_SHIFT 2
+#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
+
+// Constants:
+// for (int i = 1; i< 32; ++i)
+// printf("static const int cospi_%d_64 = %.0f;\n", i,
+// round(16384 * cos(i*M_PI/64)));
+// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
+static const tran_high_t cospi_1_64 = 16364;
+static const tran_high_t cospi_2_64 = 16305;
+static const tran_high_t cospi_3_64 = 16207;
+static const tran_high_t cospi_4_64 = 16069;
+static const tran_high_t cospi_5_64 = 15893;
+static const tran_high_t cospi_6_64 = 15679;
+static const tran_high_t cospi_7_64 = 15426;
+static const tran_high_t cospi_8_64 = 15137;
+static const tran_high_t cospi_9_64 = 14811;
+static const tran_high_t cospi_10_64 = 14449;
+static const tran_high_t cospi_11_64 = 14053;
+static const tran_high_t cospi_12_64 = 13623;
+static const tran_high_t cospi_13_64 = 13160;
+static const tran_high_t cospi_14_64 = 12665;
+static const tran_high_t cospi_15_64 = 12140;
+static const tran_high_t cospi_16_64 = 11585;
+static const tran_high_t cospi_17_64 = 11003;
+static const tran_high_t cospi_18_64 = 10394;
+static const tran_high_t cospi_19_64 = 9760;
+static const tran_high_t cospi_20_64 = 9102;
+static const tran_high_t cospi_21_64 = 8423;
+static const tran_high_t cospi_22_64 = 7723;
+static const tran_high_t cospi_23_64 = 7005;
+static const tran_high_t cospi_24_64 = 6270;
+static const tran_high_t cospi_25_64 = 5520;
+static const tran_high_t cospi_26_64 = 4756;
+static const tran_high_t cospi_27_64 = 3981;
+static const tran_high_t cospi_28_64 = 3196;
+static const tran_high_t cospi_29_64 = 2404;
+static const tran_high_t cospi_30_64 = 1606;
+static const tran_high_t cospi_31_64 = 804;
+
+// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
+static const tran_high_t sinpi_1_9 = 5283;
+static const tran_high_t sinpi_2_9 = 9929;
+static const tran_high_t sinpi_3_9 = 13377;
+static const tran_high_t sinpi_4_9 = 15212;
+
+#endif // VPX_DSP_TXFM_COMMON_H_
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -61,6 +61,8 @@
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_loopfilter_sse2.c
endif # CONFIG_VP9_HIGHBITDEPTH
+DSP_SRCS-yes += txfm_common.h
+DSP_SRCS-$(HAVE_SSE2) += x86/txfm_common_sse2.h
DSP_SRCS-$(HAVE_MSA) += mips/txfm_macros_msa.h
# forward transform
ifeq ($(CONFIG_VP9_ENCODER),yes)
--- a/vpx_dsp/x86/fwd_txfm_impl_sse2.h
+++ b/vpx_dsp/x86/fwd_txfm_impl_sse2.h
@@ -11,8 +11,9 @@
#include <emmintrin.h> // SSE2
#include "./vpx_dsp_rtcd.h"
-#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vp9/encoder/x86/vp9_dct_sse2.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
// TODO(jingning) The high bit-depth functions need rework for performance.
--- a/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
+++ b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -15,7 +15,6 @@
#include "vpx_ports/mem.h"
#if CONFIG_VP9_HIGHBITDEPTH
-// from vp9_idct.h: typedef int32_t tran_low_t;
void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr,
intptr_t count,
int skip_block,
--- /dev/null
+++ b/vpx_dsp/x86/txfm_common_sse2.h
@@ -1,0 +1,29 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_X86_TXFM_COMMON_SSE2_H_
+#define VPX_DSP_X86_TXFM_COMMON_SSE2_H_
+
+#include <emmintrin.h>
+#include "vpx/vpx_integer.h"
+
+#define pair_set_epi16(a, b) \
+ _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
+ (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a))
+
+#define dual_set_epi16(a, b) \
+ _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
+ (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))
+
+#define octa_set_epi16(a, b, c, d, e, f, g, h) \
+ _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \
+ (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h))
+
+#endif // VPX_DSP_X86_TXFM_COMMON_SSE2_H_