shithub: libvpx

Download patch

ref: 9dba044be26705b15f76a406f42cb64751015b09
parent: a5db3967eaa7c06564b6faed613a9d243230f44e
parent: 3a0602578eb765e046ecb02f2118d0d4241b88d5
author: Dmitry Kovalev <[email protected]>
date: Sat Oct 5 19:44:05 EDT 2013

Merge "Giving consistent names to IDCT/IWHT functions."

--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -31,7 +31,7 @@
 }
 void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
                  int stride, int /*tx_type*/) {
-  vp9_short_idct4x4_add_c(out, dst, stride >> 1);
+  vp9_idct4x4_16_add_c(out, dst, stride >> 1);
 }
 void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
             int stride, int tx_type) {
--- a/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
@@ -8,7 +8,7 @@
 ;
 
 
-    EXPORT  |vp9_short_idct4x4_1_add_neon|
+    EXPORT  |vp9_idct4x4_1_add_neon|
     ARM
     REQUIRE8
     PRESERVE8
@@ -15,7 +15,7 @@
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
 
-;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
+;void vp9_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
 ;                                  int dest_stride)
 ;
 ; r0  int16_t input
@@ -22,7 +22,7 @@
 ; r1  uint8_t *dest
 ; r2  int dest_stride)
 
-|vp9_short_idct4x4_1_add_neon| PROC
+|vp9_idct4x4_1_add_neon| PROC
     ldrsh            r0, [r0]
 
     ; generate cospi_16_64 = 11585
@@ -63,6 +63,6 @@
     vst1.32          {d7[1]}, [r12]
 
     bx               lr
-    ENDP             ; |vp9_short_idct4x4_1_add_neon|
+    ENDP             ; |vp9_idct4x4_1_add_neon|
 
     END
--- a/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
@@ -8,7 +8,7 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-    EXPORT  |vp9_short_idct4x4_add_neon|
+    EXPORT  |vp9_idct4x4_16_add_neon|
     ARM
     REQUIRE8
     PRESERVE8
@@ -16,13 +16,13 @@
     AREA ||.text||, CODE, READONLY, ALIGN=2
 
     AREA     Block, CODE, READONLY ; name this block of code
-;void vp9_short_idct4x4_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vp9_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
 ;
 ; r0  int16_t input
 ; r1  uint8_t *dest
 ; r2  int dest_stride)
 
-|vp9_short_idct4x4_add_neon| PROC
+|vp9_idct4x4_16_add_neon| PROC
 
     ; The 2D transform is done with two passes which are actually pretty
     ; similar. We first transform the rows. This is done by transposing
@@ -185,6 +185,6 @@
     vst1.32 {d26[1]}, [r1], r2
     vst1.32 {d26[0]}, [r1]  ; no post-increment
     bx              lr
-    ENDP  ; |vp9_short_idct4x4_add_neon|
+    ENDP  ; |vp9_idct4x4_16_add_neon|
 
     END
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -18,7 +18,7 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_idct.h"
 
-void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_iwht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
    0.5 shifts per pixel. */
   int i;
@@ -70,7 +70,7 @@
   }
 }
 
-void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
+void vp9_iwht4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
   int i;
   int a1, e1;
   int16_t tmp[4];
@@ -116,7 +116,7 @@
   output[3] = step[0] - step[3];
 }
 
-void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
   int16_t out[4 * 4];
   int16_t *outptr = out;
   int i, j;
@@ -140,7 +140,7 @@
   }
 }
 
-void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
   int i;
   int a1;
   int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
@@ -1286,20 +1286,19 @@
 }
 
 // idct
-void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob) {
+void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
   if (eob > 1)
-    vp9_short_idct4x4_add(input, dest, stride);
+    vp9_idct4x4_16_add(input, dest, stride);
   else
-    vp9_short_idct4x4_1_add(input, dest, stride);
+    vp9_idct4x4_1_add(input, dest, stride);
 }
 
 
-void vp9_idct_add_lossless(int16_t *input, uint8_t *dest, int stride,
-                             int eob) {
+void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
   if (eob > 1)
-    vp9_short_iwalsh4x4_add(input, dest, stride);
+    vp9_iwht4x4_16_add(input, dest, stride);
   else
-    vp9_short_iwalsh4x4_1_add_c(input, dest, stride);
+    vp9_iwht4x4_1_add(input, dest, stride);
 }
 
 void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob) {
@@ -1348,7 +1347,7 @@
 void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
                    int eob) {
   if (tx_type == DCT_DCT)
-    vp9_idct_add(input, dest, stride, eob);
+    vp9_idct4x4_add(input, dest, stride, eob);
   else
     vp9_short_iht4x4_add(input, dest, stride, tx_type);
 }
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -88,9 +88,8 @@
 } transform_2d;
 
 
-void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob);
-void vp9_idct_add_lossless(int16_t *input, uint8_t *dest,
-                           int stride, int eob);
+void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
+void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
 void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob);
 void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob);
 void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob);
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -267,11 +267,11 @@
 #
 # dct
 #
-prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct4x4_1_add sse2 neon
+prototype void vp9_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_idct4x4_1_add sse2 neon
 
-prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct4x4_add sse2 neon
+prototype void vp9_idct4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_idct4x4_16_add sse2 neon
 
 prototype void vp9_short_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"
 specialize vp9_short_idct8x8_1_add sse2 neon
@@ -310,11 +310,11 @@
 specialize vp9_idct4_1d sse2
 # dct and add
 
-prototype void vp9_short_iwalsh4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_iwalsh4x4_1_add
+prototype void vp9_iwht4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_iwht4x4_1_add
 
-prototype void vp9_short_iwalsh4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_iwalsh4x4_add
+prototype void vp9_iwht4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_iwht4x4_16_add
 
 #
 # Encoder functions below this point.
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -15,7 +15,7 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_idct.h"
 
-void vp9_short_idct4x4_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i eight = _mm_set1_epi16(8);
   const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
@@ -148,7 +148,7 @@
   RECON_AND_STORE4X4(dest, input3);
 }
 
-void vp9_short_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
   __m128i dc_value;
   const __m128i zero = _mm_setzero_si128();
   int a;
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -490,8 +490,7 @@
                  cm->uv_dc_delta_q == 0 &&
                  cm->uv_ac_delta_q == 0;
 
-  xd->itxm_add = xd->lossless ? vp9_idct_add_lossless
-                              : vp9_idct_add;
+  xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
 }
 
 static INTERPOLATIONFILTERTYPE read_interp_filter_type(
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1866,8 +1866,8 @@
     // printf("Switching to lossless\n");
     cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
     cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
-    cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_iwalsh4x4_1_add;
-    cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_iwalsh4x4_add;
+    cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_iwht4x4_1_add;
+    cpi->mb.e_mbd.inv_txm4x4_add = vp9_iwht4x4_16_add;
     cpi->mb.optimize = 0;
     cpi->common.lf.filter_level = 0;
     cpi->zbin_mode_boost_enabled = 0;
@@ -1876,8 +1876,8 @@
     // printf("Not lossless\n");
     cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
     cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
-    cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_idct4x4_1_add;
-    cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_idct4x4_add;
+    cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_idct4x4_1_add;
+    cpi->mb.e_mbd.inv_txm4x4_add = vp9_idct4x4_16_add;
   }
 }
 
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1261,11 +1261,11 @@
 
   cpi->oxcf.lossless = oxcf->lossless;
   if (cpi->oxcf.lossless) {
-    cpi->mb.e_mbd.inv_txm4x4_1_add    = vp9_short_iwalsh4x4_1_add;
-    cpi->mb.e_mbd.inv_txm4x4_add      = vp9_short_iwalsh4x4_add;
+    cpi->mb.e_mbd.inv_txm4x4_1_add    = vp9_iwht4x4_1_add;
+    cpi->mb.e_mbd.inv_txm4x4_add      = vp9_iwht4x4_16_add;
   } else {
-    cpi->mb.e_mbd.inv_txm4x4_1_add    = vp9_short_idct4x4_1_add;
-    cpi->mb.e_mbd.inv_txm4x4_add      = vp9_short_idct4x4_add;
+    cpi->mb.e_mbd.inv_txm4x4_1_add    = vp9_idct4x4_1_add;
+    cpi->mb.e_mbd.inv_txm4x4_add      = vp9_idct4x4_16_add;
   }
 
   cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL;