ref: a672351af986725df4cbeae546c15fc85fb2e38c
parent: e40a769036a97403718244caa81b8680a4c9c7eb
author: John Koleszar <[email protected]>
date: Thu Apr 25 10:44:44 EDT 2013
quantize: make 4x4, 8x8 common with larger transforms There were 4 variants of the quantize loop in vp9_quantize.c, now there is 1. Change-Id: Ic853393411214b32d46a6ba53769413bd14e1cac
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -155,7 +155,8 @@
void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
- void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, int y_blocks);
+ void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
+ int y_blocks);
void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2,
int y_blocks);
void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -70,12 +70,12 @@
tx_type = get_tx_type_4x4(&x->e_mbd, ib);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
- vp9_ht_quantize_b_4x4(x, ib, tx_type);
+ x->quantize_b_4x4(x, ib, tx_type, 16);
vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
diff, 16, tx_type);
} else {
x->fwd_txm4x4(src_diff, coeff, 32);
- x->quantize_b_4x4(x, ib, 16);
+ x->quantize_b_4x4(x, ib, tx_type, 16);
vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[0].eobs[ib],
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
diff, 32);
@@ -209,7 +209,7 @@
tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
- vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
+ x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
vp9_short_iht4x4(dqcoeff, diff, 16, tx_type);
} else if (!(i & 1) &&
get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
@@ -222,7 +222,7 @@
i++;
} else {
x->fwd_txm4x4(src_diff, coeff, 32);
- x->quantize_b_4x4(x, ib + iblock[i], 16);
+ x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]],
dqcoeff, diff, 32);
}
@@ -275,7 +275,7 @@
*(b->base_dst) + b->dst, b->dst_stride);
x->fwd_txm4x4(src_diff, coeff, 16);
- x->quantize_b_4x4(x, ib, 16);
+ x->quantize_b_4x4(x, ib, DCT_DCT, 16);
vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block],
dqcoeff, diff, 16);
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -26,210 +26,7 @@
plane == 1 ? 16 : 20;
}
-void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
- MACROBLOCKD *const xd = &mb->e_mbd;
- int i, rc, eob;
- int zbin;
- int x, y, z, sz;
- int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[0].coeff, b_idx, 16);
- // ht is luma-only
- int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[0].qcoeff, b_idx, 16);
- int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[0].dqcoeff, b_idx, 16);
- int16_t *zbin_boost_ptr = mb->plane[0].zrun_zbin_boost;
- int16_t *zbin_ptr = mb->plane[0].zbin;
- int16_t *round_ptr = mb->plane[0].round;
- int16_t *quant_ptr = mb->plane[0].quant;
- uint8_t *quant_shift_ptr = mb->plane[0].quant_shift;
- int16_t *dequant_ptr = xd->plane[0].dequant;
- int zbin_oq_value = mb->plane[0].zbin_extra;
- const int *pt_scan = get_scan_4x4(tx_type);
-
- vpx_memset(qcoeff_ptr, 0, 32);
- vpx_memset(dqcoeff_ptr, 0, 32);
-
- eob = -1;
-
- if (!mb->skip_block) {
- for (i = 0; i < 16; i++) {
- rc = pt_scan[i];
- z = coeff_ptr[rc];
-
- zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
- zbin_boost_ptr++;
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- if (x >= zbin) {
- x += round_ptr[rc];
- y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = mb->plane[0].zrun_zbin_boost; // reset zero run len
- }
- }
- }
- }
-
- xd->plane[0].eobs[b_idx] = eob + 1;
-}
-
-void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks) {
- MACROBLOCKD *const xd = &mb->e_mbd;
- const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
- const int c_idx = plane_idx(pb_idx.plane);
- int i, rc, eob;
- int zbin;
- int x, y, z, sz;
- int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
- pb_idx.block, 16);
- int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
- pb_idx.block, 16);
- int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
- pb_idx.block, 16);
- int16_t *zbin_boost_ptr = mb->plane[pb_idx.plane].zrun_zbin_boost;
- int16_t *zbin_ptr = mb->plane[pb_idx.plane].zbin;
- int16_t *round_ptr = mb->plane[pb_idx.plane].round;
- int16_t *quant_ptr = mb->plane[pb_idx.plane].quant;
- uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
- int16_t *dequant_ptr = xd->plane[0].dequant;
- int zbin_oq_value = mb->plane[pb_idx.plane].zbin_extra;
-
- if (c_idx == 0) assert(pb_idx.plane == 0);
- if (c_idx == 16) assert(pb_idx.plane == 1);
- if (c_idx == 20) assert(pb_idx.plane == 2);
- vpx_memset(qcoeff_ptr, 0, 32);
- vpx_memset(dqcoeff_ptr, 0, 32);
-
- eob = -1;
-
- if (!mb->skip_block) {
- for (i = 0; i < 16; i++) {
- rc = vp9_default_zig_zag1d_4x4[i];
- z = coeff_ptr[rc];
-
- zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
- zbin_boost_ptr++;
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- if (x >= zbin) {
- x += round_ptr[rc];
-
- y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = mb->plane[pb_idx.plane].zrun_zbin_boost;
- }
- }
- }
- }
-
- xd->plane[pb_idx.plane].eobs[pb_idx.block] = eob + 1;
-}
-
-void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
- int y_blocks) {
- MACROBLOCKD *const xd = &mb->e_mbd;
- const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
- const int c_idx = plane_idx(pb_idx.plane);
- int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
- pb_idx.block, 16);
- int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
- pb_idx.block, 16);
- int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
- pb_idx.block, 16);
- const int *pt_scan = get_scan_8x8(tx_type);
-
- if (c_idx == 0) assert(pb_idx.plane == 0);
- if (c_idx == 16) assert(pb_idx.plane == 1);
- if (c_idx == 20) assert(pb_idx.plane == 2);
- vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t));
- vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t));
-
- if (!mb->skip_block) {
- int i, rc, eob;
- int zbin;
- int x, y, z, sz;
- int zero_run;
- int16_t *zbin_boost_ptr = mb->plane[pb_idx.plane].zrun_zbin_boost;
- int16_t *zbin_ptr = mb->plane[pb_idx.plane].zbin;
- int16_t *round_ptr = mb->plane[pb_idx.plane].round;
- int16_t *quant_ptr = mb->plane[pb_idx.plane].quant;
- uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
- int16_t *dequant_ptr = xd->plane[pb_idx.plane].dequant;
- int zbin_oq_value = mb->plane[pb_idx.plane].zbin_extra;
-
- eob = -1;
-
- // Special case for DC as it is the one triggering access in various
- // tables: {zbin, quant, quant_shift, dequant}_ptr[rc != 0]
- {
- z = coeff_ptr[0];
- zbin = (zbin_ptr[0] + zbin_boost_ptr[0] + zbin_oq_value);
- zero_run = 1;
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- if (x >= zbin) {
- x += (round_ptr[0]);
- y = ((int)(((int)(x * quant_ptr[0]) >> 16) + x))
- >> quant_shift_ptr[0]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[0] = x; // write to destination
- dqcoeff_ptr[0] = x * dequant_ptr[0]; // dequantized value
-
- if (y) {
- eob = 0; // last nonzero coeffs
- zero_run = 0;
- }
- }
- }
- for (i = 1; i < 64; i++) {
- rc = pt_scan[i];
- z = coeff_ptr[rc];
- zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value);
- // The original code was incrementing zero_run while keeping it at
- // maximum 15 by adding "(zero_run < 15)". The same is achieved by
- // removing the opposite of the sign mask of "(zero_run - 15)".
- zero_run -= (zero_run - 15) >> 31;
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- if (x >= zbin) {
- x += (round_ptr[rc != 0]);
- y = ((int)(((int)(x * quant_ptr[1]) >> 16) + x))
- >> quant_shift_ptr[1]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[1]; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- zero_run = 0;
- }
- }
- }
- xd->plane[pb_idx.plane].eobs[pb_idx.block] = eob + 1;
- } else {
- xd->plane[pb_idx.plane].eobs[pb_idx.block] = 0;
- }
-}
-
-static void quantize(int16_t *zbin_boost_orig_ptr,
+static void quantize(int16_t *zbin_boost_orig_ptr,
int16_t *coeff_ptr, int n_coeffs, int skip_block,
int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr,
uint8_t *quant_shift_ptr,
@@ -278,16 +75,54 @@
*eob_ptr = eob + 1;
}
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
+ int y_blocks) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
+ const int *pt_scan = get_scan_4x4(tx_type);
+
+ quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
+ BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
+ 16, mb->skip_block,
+ mb->plane[pb_idx.plane].zbin,
+ mb->plane[pb_idx.plane].round,
+ mb->plane[pb_idx.plane].quant,
+ mb->plane[pb_idx.plane].quant_shift,
+ BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
+ BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
+ xd->plane[pb_idx.plane].dequant,
+ mb->plane[pb_idx.plane].zbin_extra,
+ &xd->plane[pb_idx.plane].eobs[pb_idx.block],
+ pt_scan, 1);
+}
+
+void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
+ int y_blocks) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
+ const int *pt_scan = get_scan_8x8(tx_type);
+
+ quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
+ BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
+ 64, mb->skip_block,
+ mb->plane[pb_idx.plane].zbin,
+ mb->plane[pb_idx.plane].round,
+ mb->plane[pb_idx.plane].quant,
+ mb->plane[pb_idx.plane].quant_shift,
+ BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
+ BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
+ xd->plane[pb_idx.plane].dequant,
+ mb->plane[pb_idx.plane].zbin_extra,
+ &xd->plane[pb_idx.plane].eobs[pb_idx.block],
+ pt_scan, 1);
+}
+
void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
int y_blocks) {
MACROBLOCKD *const xd = &mb->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
- const int c_idx = plane_idx(pb_idx.plane);
const int *pt_scan = get_scan_16x16(tx_type);
- if (c_idx == 0) assert(pb_idx.plane == 0);
- if (c_idx == 16) assert(pb_idx.plane == 1);
- if (c_idx == 20) assert(pb_idx.plane == 2);
quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
256, mb->skip_block,
@@ -306,11 +141,7 @@
void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) {
MACROBLOCKD *const xd = &mb->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
- const int c_idx = plane_idx(pb_idx.plane);
- if (c_idx == 0) assert(pb_idx.plane == 0);
- if (c_idx == 16) assert(pb_idx.plane == 1);
- if (c_idx == 20) assert(pb_idx.plane == 2);
quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
1024, mb->skip_block,
@@ -371,11 +202,7 @@
for (n = 0; n < bw * bh; n++) {
const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
- if (tx_type != DCT_DCT) {
- vp9_ht_quantize_b_4x4(x, n, tx_type);
- } else {
- x->quantize_b_4x4(x, n, bw * bh);
- }
+ x->quantize_b_4x4(x, n, tx_type, bw * bh);
}
}
@@ -412,7 +239,7 @@
int i;
for (i = uoff; i < ((uoff * 3) >> 1); i++)
- x->quantize_b_4x4(x, i, uoff);
+ x->quantize_b_4x4(x, i, DCT_DCT, uoff);
}
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
@@ -421,8 +248,8 @@
* of blocks. */
void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2,
int y_blocks) {
- vp9_regular_quantize_b_4x4(x, b_idx1, y_blocks);
- vp9_regular_quantize_b_4x4(x, b_idx2, y_blocks);
+ vp9_regular_quantize_b_4x4(x, b_idx1, DCT_DCT, y_blocks);
+ vp9_regular_quantize_b_4x4(x, b_idx2, DCT_DCT, y_blocks);
}
static void invert_quant(int16_t *quant, uint8_t *shift, int d) {
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -26,10 +26,10 @@
#include "x86/vp9_quantize_x86.h"
#endif
-void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_ix, TX_TYPE type);
-void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks);
void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2,
int y_blocks);
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
+ int y_blocks);
void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
int y_blocks);
void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -909,10 +909,10 @@
tx_type = get_tx_type_4x4(xd, ib);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
- vp9_ht_quantize_b_4x4(x, ib, tx_type);
+ x->quantize_b_4x4(x, ib, tx_type, 16);
} else {
x->fwd_txm4x4(src_diff, coeff, 32);
- x->quantize_b_4x4(x, ib, 16);
+ x->quantize_b_4x4(x, ib, tx_type, 16);
}
tempa = ta;
@@ -1167,7 +1167,7 @@
tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
- vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
+ x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
} else if (!(i & 1) &&
get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
x->fwd_txm8x4(src_diff, coeff, 32);
@@ -1175,7 +1175,7 @@
do_two = 1;
} else {
x->fwd_txm4x4(src_diff, coeff, 32);
- x->quantize_b_4x4(x, ib + iblock[i], 16);
+ x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
}
distortion += vp9_block_error_c(coeff,
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16),
@@ -1756,7 +1756,7 @@
src, src_stride,
*(bd->base_dst) + bd->dst, bd->dst_stride);
x->fwd_txm4x4(src_diff, coeff, 32);
- x->quantize_b_4x4(x, i, 16);
+ x->quantize_b_4x4(x, i, DCT_DCT, 16);
thisdistortion = vp9_block_error(coeff,
BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16);
*distortion += thisdistortion;