ref: 9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25
parent: c9af3de1557a6c99fada8b9b608ef14ba8f24a85
parent: 190c2b4591039a2acef4964581e0d24d82de0d61
author: Dmitry Kovalev <[email protected]>
date: Tue Oct 22 09:05:24 EDT 2013
Merge "Using stride (# of elements) instead of pitch (bytes) in fdct4x4."
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -31,15 +31,15 @@
}
void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int /*tx_type*/) {
- vp9_idct4x4_16_add_c(out, dst, stride >> 1);
+ vp9_idct4x4_16_add_c(out, dst, stride);
}
void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int tx_type) {
- vp9_short_fht4x4_c(in, out, stride >> 1, tx_type);
+ vp9_short_fht4x4_c(in, out, stride, tx_type);
}
void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
- vp9_iht4x4_16_add_c(out, dst, stride >> 1, tx_type);
+ vp9_iht4x4_16_add_c(out, dst, stride, tx_type);
}
class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
@@ -78,7 +78,7 @@
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
- const int pitch = 8;
+ const int pitch = 4;
int count_sign_block[16][2];
const int count_test_block = 1000000;
@@ -152,7 +152,7 @@
for (int j = 0; j < 16; ++j)
test_input_block[j] = src[j] - dst[j];
- const int pitch = 8;
+ const int pitch = 4;
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
for (int j = 0; j < 16; ++j) {
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -698,7 +698,7 @@
prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int stride"
specialize vp9_short_fdct8x8 sse2
-prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
+prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int stride"
specialize vp9_short_fdct4x4 sse2
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int stride"
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -36,7 +36,7 @@
output[3] = dct_const_round_shift(temp2);
}
-void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
@@ -43,7 +43,6 @@
// as the first pass results are transposed, we tranpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
- const int stride = pitch >> 1;
int pass;
// We need an intermediate buffer between passes.
int16_t intermediate[4 * 4];
@@ -586,18 +585,17 @@
/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
pixel. */
-void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int stride) {
int i;
int a1, b1, c1, d1, e1;
int16_t *ip = input;
int16_t *op = output;
- int pitch_short = pitch >> 1;
for (i = 0; i < 4; i++) {
- a1 = ip[0 * pitch_short];
- b1 = ip[1 * pitch_short];
- c1 = ip[2 * pitch_short];
- d1 = ip[3 * pitch_short];
+ a1 = ip[0 * stride];
+ b1 = ip[1 * stride];
+ c1 = ip[2 * stride];
+ d1 = ip[3 * stride];
a1 += b1;
d1 = d1 - c1;
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -402,7 +402,7 @@
xoff = 4 * (block & twmask);
yoff = 4 * (block >> twl);
src_diff = p->src_diff + 4 * bw * yoff + xoff;
- x->fwd_txm4x4(src_diff, coeff, bw * 8);
+ x->fwd_txm4x4(src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
@@ -612,7 +612,7 @@
if (tx_type != DCT_DCT)
vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
else
- x->fwd_txm4x4(src_diff, coeff, bw * 8);
+ x->fwd_txm4x4(src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -959,9 +959,9 @@
sf->optimize_coefficients = 0;
}
- cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
+ cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
- cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
+ cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
}
cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4;
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1089,7 +1089,7 @@
vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
x->quantize_b_4x4(x, block, tx_type, 16);
} else {
- x->fwd_txm4x4(src_diff, coeff, 16);
+ x->fwd_txm4x4(src_diff, coeff, 8);
x->quantize_b_4x4(x, block, tx_type, 16);
}
@@ -1563,7 +1563,7 @@
k += (idy * 2 + idx);
coeff = BLOCK_OFFSET(p->coeff, k);
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
- coeff, 16);
+ coeff, 8);
x->quantize_b_4x4(x, k, DCT_DCT, 16);
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
16, &ssz);
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -12,7 +12,7 @@
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vpx_ports/mem.h"
-void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
@@ -19,7 +19,6 @@
// as the first pass results are transposed, we tranpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
- const int stride = pitch >> 1;
int pass;
// Constants
// When we use them, in one case, they are all the same. In all others