ref: 1aa7fd5aefa56453b4fa0136905d481c35b23ed5
parent: eaf2d4cbfc80cd46fbc29a3a892910dfebcfedf4
author: Dmitry Kovalev <[email protected]>
date: Fri Oct 18 07:49:33 EDT 2013
Using stride (# of elements) instead of pitch (bytes) in fdct16x16. Just making fdct consistent with iht/idct/fht functions which all use stride (# of elements) as input argument. Change-Id: I2d95fdcbba96aaa0ed24a80870cb38f53487a97d
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -395,8 +395,7 @@
for (int j = 0; j < kNumCoeffs; ++j)
coeff[j] = round(out_r[j]);
- const int pitch = 32;
- REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch));
+ REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
for (int j = 0; j < kNumCoeffs; ++j) {
const uint32_t diff = dst[j] - src[j];
@@ -421,7 +420,7 @@
fwd_txfm_ = GET_PARAM(0);
inv_txfm_ = GET_PARAM(1);
tx_type_ = GET_PARAM(2);
- pitch_ = 32;
+ pitch_ = 16;
fwd_txfm_ref = fdct16x16_ref;
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
@@ -431,7 +430,7 @@
fwd_txfm_(in, out, stride);
}
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
- inv_txfm_(out, dst, stride >> 1);
+ inv_txfm_(out, dst, stride);
}
fdct_t fwd_txfm_;
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -707,7 +707,7 @@
prototype void vp9_short_fdct32x32_rd "int16_t *InputData, int16_t *OutputData, int stride"
specialize vp9_short_fdct32x32_rd sse2
-prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int pitch"
+prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int stride"
specialize vp9_short_fdct16x16 sse2
prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -302,7 +302,7 @@
}
}
-void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
@@ -309,7 +309,6 @@
// as the first pass results are transposed, we tranpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
- const int stride = pitch >> 1;
int pass;
// We need an intermediate buffer between passes.
int16_t intermediate[256];
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -379,7 +379,7 @@
xoff = 16 * (block & twmask);
yoff = 16 * (block >> twl);
src_diff = p->src_diff + 4 * bw * yoff + xoff;
- vp9_short_fdct16x16(src_diff, coeff, bw * 8);
+ vp9_short_fdct16x16(src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
@@ -559,7 +559,7 @@
if (tx_type != DCT_DCT)
vp9_short_fht16x16(src_diff, coeff, bw * 4, tx_type);
else
- vp9_short_fdct16x16(src_diff, coeff, bw * 8);
+ vp9_short_fdct16x16(src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -1056,7 +1056,7 @@
write_buffer_8x8(output, in, 8);
}
-void vp9_short_fdct16x16_sse2(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct16x16_sse2(int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
@@ -1063,7 +1063,6 @@
// as the first pass results are transposed, we tranpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
- const int stride = pitch >> 1;
int pass;
// We need an intermediate buffer between passes.
DECLARE_ALIGNED_ARRAY(16, int16_t, intermediate, 256);