shithub: libvpx

--- a/test/fdct4x4_test.cc

+++ b/test/fdct4x4_test.cc

@@ -31,15 +31,15 @@

 void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,

                  int stride, int /*tx_type*/) {

-  vp9_idct4x4_16_add_c(out, dst, stride >> 1);

+  vp9_idct4x4_16_add_c(out, dst, stride);

 void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,

             int stride, int tx_type) {

-  vp9_short_fht4x4_c(in, out, stride >> 1, tx_type);

+  vp9_short_fht4x4_c(in, out, stride, tx_type);

 void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,

                 int stride, int tx_type) {

-  vp9_iht4x4_16_add_c(out, dst, stride >> 1, tx_type);

+  vp9_iht4x4_16_add_c(out, dst, stride, tx_type);

 class FwdTrans4x4Test : public ::testing::TestWithParam<int> {

@@ -78,7 +78,7 @@

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);

   DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);

-  const int pitch = 8;

+  const int pitch = 4;

   int count_sign_block[16][2];

   const int count_test_block = 1000000;

@@ -152,7 +152,7 @@

     for (int j = 0; j < 16; ++j)

       test_input_block[j] = src[j] - dst[j];

-    const int pitch = 8;

+    const int pitch = 4;

     RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);

     for (int j = 0; j < 16; ++j) {

--- a/vp9/common/vp9_rtcd_defs.sh

+++ b/vp9/common/vp9_rtcd_defs.sh

@@ -698,7 +698,7 @@

 prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int stride"

 specialize vp9_short_fdct8x8 sse2

-prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"

+prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int stride"

 specialize vp9_short_fdct4x4 sse2

 prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int stride"

--- a/vp9/encoder/vp9_dct.c

+++ b/vp9/encoder/vp9_dct.c

@@ -36,7 +36,7 @@

   output[3] = dct_const_round_shift(temp2);

-void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {

+void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int stride) {

   // The 2D transform is done with two passes which are actually pretty

   // similar. In the first one, we transform the columns and transpose

   // the results. In the second one, we transform the rows. To achieve that,

@@ -43,7 +43,6 @@

   // as the first pass results are transposed, we tranpose the columns (that

   // is the transposed rows) and transpose the results (so that it goes back

   // in normal/row positions).

-  const int stride = pitch >> 1;

   int pass;

   // We need an intermediate buffer between passes.

   int16_t intermediate[4 * 4];

@@ -586,18 +585,17 @@

 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per

    pixel. */

-void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int pitch) {

+void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int stride) {

   int i;

   int a1, b1, c1, d1, e1;

   int16_t *ip = input;

   int16_t *op = output;

-  int pitch_short = pitch >> 1;

   for (i = 0; i < 4; i++) {

-    a1 = ip[0 * pitch_short];

-    b1 = ip[1 * pitch_short];

-    c1 = ip[2 * pitch_short];

-    d1 = ip[3 * pitch_short];

+    a1 = ip[0 * stride];

+    b1 = ip[1 * stride];

+    c1 = ip[2 * stride];

+    d1 = ip[3 * stride];

     a1 += b1;

     d1 = d1 - c1;

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -402,7 +402,7 @@

       xoff = 4 * (block & twmask);

       yoff = 4 * (block >> twl);

       src_diff = p->src_diff + 4 * bw * yoff + xoff;

-      x->fwd_txm4x4(src_diff, coeff, bw * 8);

+      x->fwd_txm4x4(src_diff, coeff, bw * 4);

       vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,

                      p->quant, p->quant_shift, qcoeff, dqcoeff,

                      pd->dequant, p->zbin_extra, eob, scan, iscan);

@@ -612,7 +612,7 @@

       if (tx_type != DCT_DCT)

         vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);

       else

-        x->fwd_txm4x4(src_diff, coeff, bw * 8);

+        x->fwd_txm4x4(src_diff, coeff, bw * 4);

       vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,

                      p->quant_shift, qcoeff, dqcoeff,

                      pd->dequant, p->zbin_extra, eob, scan, iscan);

--- a/vp9/encoder/vp9_onyx_if.c

+++ b/vp9/encoder/vp9_onyx_if.c

@@ -959,9 +959,9 @@

     sf->optimize_coefficients = 0;

-  cpi->mb.fwd_txm4x4    = vp9_short_fdct4x4;

+  cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;

   if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {

-    cpi->mb.fwd_txm4x4    = vp9_short_walsh4x4;

+    cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;

   cpi->mb.quantize_b_4x4      = vp9_regular_quantize_b_4x4;

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -1089,7 +1089,7 @@

           vp9_short_fht4x4(src_diff, coeff, 8, tx_type);

           x->quantize_b_4x4(x, block, tx_type, 16);

         } else {

-          x->fwd_txm4x4(src_diff, coeff, 16);

+          x->fwd_txm4x4(src_diff, coeff, 8);

           x->quantize_b_4x4(x, block, tx_type, 16);

@@ -1563,7 +1563,7 @@

       k += (idy * 2 + idx);

       coeff = BLOCK_OFFSET(p->coeff, k);

       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),

-                    coeff, 16);

+                    coeff, 8);

       x->quantize_b_4x4(x, k, DCT_DCT, 16);

       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),

                                         16, &ssz);

--- a/vp9/encoder/x86/vp9_dct_sse2.c

+++ b/vp9/encoder/x86/vp9_dct_sse2.c

@@ -12,7 +12,7 @@

 #include "vp9/common/vp9_idct.h"  // for cospi constants

 #include "vpx_ports/mem.h"

-void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) {

+void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int stride) {

   // The 2D transform is done with two passes which are actually pretty

   // similar. In the first one, we transform the columns and transpose

   // the results. In the second one, we transform the rows. To achieve that,

@@ -19,7 +19,6 @@

   // as the first pass results are transposed, we tranpose the columns (that

   // is the transposed rows) and transpose the results (so that it goes back

   // in normal/row positions).

-  const int stride = pitch >> 1;

   int pass;

   // Constants

   //    When we use them, in one case, they are all the same. In all others