shithub: libvpx

Download patch

ref: 5ebc8febdc25ea79a89ac01fef21f84fd53b3143
parent: 55c6a74bd4f228e48d56de200f25154eb733fc40
author: Jingning Han <[email protected]>
date: Thu Jul 23 12:35:44 EDT 2015

Refactor vp9_idct.h file

Separate the common coefficient constant into vpx_dsp/txfm_common.h.
Move the SSE2 macro definitions to vpx_dsp/x86/txfm_common_sse2.h.
This clears the use case of vp9_idct.h in vpx_dsp folder.

Change-Id: I319735a2abf42888e5080ac14cfbcde34be7b121

--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -14,6 +14,7 @@
 #include <assert.h>
 
 #include "./vpx_config.h"
+#include "vpx_dsp/txfm_common.h"
 #include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_enums.h"
@@ -21,68 +22,6 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-
-// Constants and Macros used by all idct/dct functions
-#define DCT_CONST_BITS 14
-#define DCT_CONST_ROUNDING  (1 << (DCT_CONST_BITS - 1))
-
-#define UNIT_QUANT_SHIFT 2
-#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
-
-#define pair_set_epi16(a, b) \
-  _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
-                (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a))
-
-#define dual_set_epi16(a, b) \
-  _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
-                (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))
-
-#define octa_set_epi16(a, b, c, d, e, f, g, h) \
-  _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \
-                 (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h))
-
-// Constants:
-//  for (int i = 1; i< 32; ++i)
-//    printf("static const int cospi_%d_64 = %.0f;\n", i,
-//           round(16384 * cos(i*M_PI/64)));
-// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
-static const tran_high_t cospi_1_64  = 16364;
-static const tran_high_t cospi_2_64  = 16305;
-static const tran_high_t cospi_3_64  = 16207;
-static const tran_high_t cospi_4_64  = 16069;
-static const tran_high_t cospi_5_64  = 15893;
-static const tran_high_t cospi_6_64  = 15679;
-static const tran_high_t cospi_7_64  = 15426;
-static const tran_high_t cospi_8_64  = 15137;
-static const tran_high_t cospi_9_64  = 14811;
-static const tran_high_t cospi_10_64 = 14449;
-static const tran_high_t cospi_11_64 = 14053;
-static const tran_high_t cospi_12_64 = 13623;
-static const tran_high_t cospi_13_64 = 13160;
-static const tran_high_t cospi_14_64 = 12665;
-static const tran_high_t cospi_15_64 = 12140;
-static const tran_high_t cospi_16_64 = 11585;
-static const tran_high_t cospi_17_64 = 11003;
-static const tran_high_t cospi_18_64 = 10394;
-static const tran_high_t cospi_19_64 = 9760;
-static const tran_high_t cospi_20_64 = 9102;
-static const tran_high_t cospi_21_64 = 8423;
-static const tran_high_t cospi_22_64 = 7723;
-static const tran_high_t cospi_23_64 = 7005;
-static const tran_high_t cospi_24_64 = 6270;
-static const tran_high_t cospi_25_64 = 5520;
-static const tran_high_t cospi_26_64 = 4756;
-static const tran_high_t cospi_27_64 = 3981;
-static const tran_high_t cospi_28_64 = 3196;
-static const tran_high_t cospi_29_64 = 2404;
-static const tran_high_t cospi_30_64 = 1606;
-static const tran_high_t cospi_31_64 = 804;
-
-//  16384 * sqrt(2) * sin(kPi/9) * 2 / 3
-static const tran_high_t sinpi_1_9 = 5283;
-static const tran_high_t sinpi_2_9 = 9929;
-static const tran_high_t sinpi_3_9 = 13377;
-static const tran_high_t sinpi_4_9 = 15212;
 
 static INLINE tran_low_t check_range(tran_high_t input) {
 #if CONFIG_COEFFICIENT_RANGE_CHECKING
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -9,9 +9,9 @@
  */
 
 #include "./vp9_rtcd.h"
-#include "vpx_ports/mem.h"
 #include "vp9/common/x86/vp9_idct_intrin_sse2.h"
-#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+#include "vpx_ports/mem.h"
 
 #define RECON_AND_STORE4X4(dest, in_x) \
 {                                                     \
--- a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
+++ b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
@@ -11,6 +11,8 @@
 #include <emmintrin.h>  // SSE2
 
 #include "vp9/encoder/vp9_dct.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
 #include "vpx_ports/mem.h"
 
 #if DCT_HIGH_BIT_DEPTH
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -13,8 +13,9 @@
 
 #include "./vp9_rtcd.h"
 #include "./vpx_dsp_rtcd.h"
-#include "vp9/common/vp9_idct.h"  // for cospi constants
 #include "vp9/encoder/x86/vp9_dct_sse2.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
 #include "vpx_ports/mem.h"
 
 void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
--- a/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -18,6 +18,7 @@
 
 #include "./vp9_rtcd.h"
 #include "vp9/common/x86/vp9_idct_intrin_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
 
 void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
                              int16_t* coeff_ptr, intptr_t n_coeffs,
--- a/vpx_dsp/arm/fwd_txfm_neon.c
+++ b/vpx_dsp/arm/fwd_txfm_neon.c
@@ -9,8 +9,9 @@
  */
 
 #include <arm_neon.h>
+
 #include "./vpx_config.h"
-#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/txfm_common.h"
 
 void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
   int i;
--- a/vpx_dsp/fwd_txfm.h
+++ b/vpx_dsp/fwd_txfm.h
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/txfm_common.h"
 
 static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
   tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
--- a/vpx_dsp/mips/fwd_txfm_msa.h
+++ b/vpx_dsp/mips/fwd_txfm_msa.h
@@ -12,7 +12,7 @@
 #define VPX_DSP_MIPS_FWD_TXFM_MSA_H_
 
 #include "vpx_dsp/mips/txfm_macros_msa.h"
-#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/txfm_common.h"
 
 #define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) {     \
   v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m;                         \
--- /dev/null
+++ b/vpx_dsp/txfm_common.h
@@ -1,0 +1,66 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_TXFM_COMMON_H_
+#define VPX_DSP_TXFM_COMMON_H_
+
+#include "vpx_dsp/vpx_dsp_common.h"
+
+// Constants and Macros used by all idct/dct functions
+#define DCT_CONST_BITS 14
+#define DCT_CONST_ROUNDING  (1 << (DCT_CONST_BITS - 1))
+
+#define UNIT_QUANT_SHIFT 2
+#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
+
+// Constants:
+//  for (int i = 1; i< 32; ++i)
+//    printf("static const int cospi_%d_64 = %.0f;\n", i,
+//           round(16384 * cos(i*M_PI/64)));
+// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
+static const tran_high_t cospi_1_64  = 16364;
+static const tran_high_t cospi_2_64  = 16305;
+static const tran_high_t cospi_3_64  = 16207;
+static const tran_high_t cospi_4_64  = 16069;
+static const tran_high_t cospi_5_64  = 15893;
+static const tran_high_t cospi_6_64  = 15679;
+static const tran_high_t cospi_7_64  = 15426;
+static const tran_high_t cospi_8_64  = 15137;
+static const tran_high_t cospi_9_64  = 14811;
+static const tran_high_t cospi_10_64 = 14449;
+static const tran_high_t cospi_11_64 = 14053;
+static const tran_high_t cospi_12_64 = 13623;
+static const tran_high_t cospi_13_64 = 13160;
+static const tran_high_t cospi_14_64 = 12665;
+static const tran_high_t cospi_15_64 = 12140;
+static const tran_high_t cospi_16_64 = 11585;
+static const tran_high_t cospi_17_64 = 11003;
+static const tran_high_t cospi_18_64 = 10394;
+static const tran_high_t cospi_19_64 = 9760;
+static const tran_high_t cospi_20_64 = 9102;
+static const tran_high_t cospi_21_64 = 8423;
+static const tran_high_t cospi_22_64 = 7723;
+static const tran_high_t cospi_23_64 = 7005;
+static const tran_high_t cospi_24_64 = 6270;
+static const tran_high_t cospi_25_64 = 5520;
+static const tran_high_t cospi_26_64 = 4756;
+static const tran_high_t cospi_27_64 = 3981;
+static const tran_high_t cospi_28_64 = 3196;
+static const tran_high_t cospi_29_64 = 2404;
+static const tran_high_t cospi_30_64 = 1606;
+static const tran_high_t cospi_31_64 = 804;
+
+//  16384 * sqrt(2) * sin(kPi/9) * 2 / 3
+static const tran_high_t sinpi_1_9 = 5283;
+static const tran_high_t sinpi_2_9 = 9929;
+static const tran_high_t sinpi_3_9 = 13377;
+static const tran_high_t sinpi_4_9 = 15212;
+
+#endif  // VPX_DSP_TXFM_COMMON_H_
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -61,6 +61,8 @@
 DSP_SRCS-$(HAVE_SSE2)   += x86/highbd_loopfilter_sse2.c
 endif  # CONFIG_VP9_HIGHBITDEPTH
 
+DSP_SRCS-yes            += txfm_common.h
+DSP_SRCS-$(HAVE_SSE2)   += x86/txfm_common_sse2.h
 DSP_SRCS-$(HAVE_MSA)    += mips/txfm_macros_msa.h
 # forward transform
 ifeq ($(CONFIG_VP9_ENCODER),yes)
--- a/vpx_dsp/x86/fwd_txfm_impl_sse2.h
+++ b/vpx_dsp/x86/fwd_txfm_impl_sse2.h
@@ -11,8 +11,9 @@
 #include <emmintrin.h>  // SSE2
 
 #include "./vpx_dsp_rtcd.h"
-#include "vp9/common/vp9_idct.h"  // for cospi constants
 #include "vp9/encoder/x86/vp9_dct_sse2.h"
+#include "vpx_dsp/txfm_common.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
 #include "vpx_ports/mem.h"
 
 // TODO(jingning) The high bit-depth functions need rework for performance.
--- a/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
+++ b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -15,7 +15,6 @@
 #include "vpx_ports/mem.h"
 
 #if CONFIG_VP9_HIGHBITDEPTH
-// from vp9_idct.h: typedef int32_t tran_low_t;
 void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr,
                                 intptr_t count,
                                 int skip_block,
--- /dev/null
+++ b/vpx_dsp/x86/txfm_common_sse2.h
@@ -1,0 +1,29 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_X86_TXFM_COMMON_SSE2_H_
+#define VPX_DSP_X86_TXFM_COMMON_SSE2_H_
+
+#include <emmintrin.h>
+#include "vpx/vpx_integer.h"
+
+#define pair_set_epi16(a, b) \
+  _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
+                (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a))
+
+#define dual_set_epi16(a, b) \
+  _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
+                (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))
+
+#define octa_set_epi16(a, b, c, d, e, f, g, h) \
+  _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \
+                 (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h))
+
+#endif  // VPX_DSP_X86_TXFM_COMMON_SSE2_H_