ref: 79a00d71bd3619e9d51aeb4472856026b73c3d1b
parent: 2fdb63fd727a8ba46e5acdf1ba388c8491b57f71
author: Johann <[email protected]>
date: Tue Aug 18 07:43:40 EDT 2015
Rename vp8 quantize.c Move it to vp8_quantize.c and make sure to use the full path for all vp8 includes of quantize.h Change-Id: I284651ff681707385f4924ea7db1541905c1624a
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -12,7 +12,7 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
#include "vp8/common/reconintra4x4.h"
#include "encodemb.h"
#include "vp8/common/invtrans.h"
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -14,7 +14,7 @@
#include "vp8_rtcd.h"
#include "encodemb.h"
#include "vp8/common/reconinter.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
#include "tokenize.h"
#include "vp8/common/invtrans.h"
#include "vpx_mem/vpx_mem.h"
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -17,7 +17,7 @@
#include "vp8/common/blockd.h"
#include "onyx_int.h"
#include "vp8/common/systemdependent.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
#include "vp8/common/alloccommon.h"
#include "mcomp.h"
#include "firstpass.h"
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -20,7 +20,7 @@
#include "vp8/common/onyxc_int.h"
#include "vpx_dsp/variance.h"
#include "encodemb.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
#include "vp8/common/entropy.h"
#include "vp8/common/threading.h"
#include "vpx_ports/mem.h"
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -13,7 +13,7 @@
#include "./vpx_scale_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "onyx_int.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/alloccommon.h"
--- a/vp8/encoder/quantize.c
+++ /dev/null
@@ -1,583 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <math.h>
-#include "vpx_mem/vpx_mem.h"
-
-#include "onyx_int.h"
-#include "quantize.h"
-#include "vp8/common/quant_common.h"
-
-void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
-{
- int i, rc, eob;
- int x, y, z, sz;
- short *coeff_ptr = b->coeff;
- short *round_ptr = b->round;
- short *quant_ptr = b->quant_fast;
- short *qcoeff_ptr = d->qcoeff;
- short *dqcoeff_ptr = d->dqcoeff;
- short *dequant_ptr = d->dequant;
-
- eob = -1;
- for (i = 0; i < 16; i++)
- {
- rc = vp8_default_zig_zag1d[i];
- z = coeff_ptr[rc];
-
- sz = (z >> 31); /* sign of z */
- x = (z ^ sz) - sz; /* x = abs(z) */
-
- y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */
- x = (y ^ sz) - sz; /* get the sign back */
- qcoeff_ptr[rc] = x; /* write to destination */
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
-
- if (y)
- {
- eob = i; /* last nonzero coeffs */
- }
- }
- *d->eob = (char)(eob + 1);
-}
-
-void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
-{
- int i, rc, eob;
- int zbin;
- int x, y, z, sz;
- short *zbin_boost_ptr = b->zrun_zbin_boost;
- short *coeff_ptr = b->coeff;
- short *zbin_ptr = b->zbin;
- short *round_ptr = b->round;
- short *quant_ptr = b->quant;
- short *quant_shift_ptr = b->quant_shift;
- short *qcoeff_ptr = d->qcoeff;
- short *dqcoeff_ptr = d->dqcoeff;
- short *dequant_ptr = d->dequant;
- short zbin_oq_value = b->zbin_extra;
-
- memset(qcoeff_ptr, 0, 32);
- memset(dqcoeff_ptr, 0, 32);
-
- eob = -1;
-
- for (i = 0; i < 16; i++)
- {
- rc = vp8_default_zig_zag1d[i];
- z = coeff_ptr[rc];
-
- zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
-
- zbin_boost_ptr ++;
- sz = (z >> 31); /* sign of z */
- x = (z ^ sz) - sz; /* x = abs(z) */
-
- if (x >= zbin)
- {
- x += round_ptr[rc];
- y = ((((x * quant_ptr[rc]) >> 16) + x)
- * quant_shift_ptr[rc]) >> 16; /* quantize (x) */
- x = (y ^ sz) - sz; /* get the sign back */
- qcoeff_ptr[rc] = x; /* write to destination */
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
-
- if (y)
- {
- eob = i; /* last nonzero coeffs */
- zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */
- }
- }
- }
-
- *d->eob = (char)(eob + 1);
-}
-
-void vp8_quantize_mby(MACROBLOCK *x)
-{
- int i;
- int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
- && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
-
- for (i = 0; i < 16; i++)
- x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-
- if(has_2nd_order)
- x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
-}
-
-void vp8_quantize_mb(MACROBLOCK *x)
-{
- int i;
- int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
- && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
-
- for (i = 0; i < 24+has_2nd_order; i++)
- x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-}
-
-
-void vp8_quantize_mbuv(MACROBLOCK *x)
-{
- int i;
-
- for (i = 16; i < 24; i++)
- x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-}
-
-static const int qrounding_factors[129] =
-{
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48
-};
-
-
-static const int qzbin_factors[129] =
-{
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80
-};
-
-
-static const int qrounding_factors_y2[129] =
-{
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48
-};
-
-
-static const int qzbin_factors_y2[129] =
-{
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80
-};
-
-
-static void invert_quant(int improved_quant, short *quant,
- short *shift, short d)
-{
- if(improved_quant)
- {
- unsigned t;
- int l;
- t = d;
- for(l = 0; t > 1; l++)
- t>>=1;
- t = 1 + (1<<(16+l))/d;
- *quant = (short)(t - (1<<16));
- *shift = l;
- /* use multiplication and constant shift by 16 */
- *shift = 1 << (16 - *shift);
- }
- else
- {
- *quant = (1 << 16) / d;
- *shift = 0;
- /* use multiplication and constant shift by 16 */
- *shift = 1 << (16 - *shift);
- }
-}
-
-
-void vp8cx_init_quantizer(VP8_COMP *cpi)
-{
- int i;
- int quant_val;
- int Q;
-
- int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44,
- 44, 44};
-
- for (Q = 0; Q < QINDEX_RANGE; Q++)
- {
- /* dc values */
- quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
- cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
- invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
- cpi->Y1quant_shift[Q] + 0, quant_val);
- cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.Y1dequant[Q][0] = quant_val;
- cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
-
- quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
- cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
- invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
- cpi->Y2quant_shift[Q] + 0, quant_val);
- cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
- cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
- cpi->common.Y2dequant[Q][0] = quant_val;
- cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
-
- quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
- cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
- invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
- cpi->UVquant_shift[Q] + 0, quant_val);
- cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
- cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.UVdequant[Q][0] = quant_val;
- cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
-
- /* all the ac values = ; */
- quant_val = vp8_ac_yquant(Q);
- cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;
- invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,
- cpi->Y1quant_shift[Q] + 1, quant_val);
- cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.Y1dequant[Q][1] = quant_val;
- cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7;
-
- quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
- cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val;
- invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1,
- cpi->Y2quant_shift[Q] + 1, quant_val);
- cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
- cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7;
- cpi->common.Y2dequant[Q][1] = quant_val;
- cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7;
-
- quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val;
- invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1,
- cpi->UVquant_shift[Q] + 1, quant_val);
- cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.UVdequant[Q][1] = quant_val;
- cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7;
-
- for (i = 2; i < 16; i++)
- {
- cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1];
- cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1];
- cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];
- cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];
- cpi->Y1round[Q][i] = cpi->Y1round[Q][1];
- cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *
- zbin_boost[i]) >> 7;
-
- cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1];
- cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1];
- cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];
- cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];
- cpi->Y2round[Q][i] = cpi->Y2round[Q][1];
- cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *
- zbin_boost[i]) >> 7;
-
- cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1];
- cpi->UVquant[Q][i] = cpi->UVquant[Q][1];
- cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];
- cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];
- cpi->UVround[Q][i] = cpi->UVround[Q][1];
- cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *
- zbin_boost[i]) >> 7;
- }
- }
-}
-
-#define ZBIN_EXTRA_Y \
- (( cpi->common.Y1dequant[QIndex][1] * \
- ( x->zbin_over_quant + \
- x->zbin_mode_boost + \
- x->act_zbin_adj ) ) >> 7)
-
-#define ZBIN_EXTRA_UV \
- (( cpi->common.UVdequant[QIndex][1] * \
- ( x->zbin_over_quant + \
- x->zbin_mode_boost + \
- x->act_zbin_adj ) ) >> 7)
-
-#define ZBIN_EXTRA_Y2 \
- (( cpi->common.Y2dequant[QIndex][1] * \
- ( (x->zbin_over_quant / 2) + \
- x->zbin_mode_boost + \
- x->act_zbin_adj ) ) >> 7)
-
-void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
-{
- int i;
- int QIndex;
- MACROBLOCKD *xd = &x->e_mbd;
- int zbin_extra;
-
- /* Select the baseline MB Q index. */
- if (xd->segmentation_enabled)
- {
- /* Abs Value */
- if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
- QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
- /* Delta Value */
- else
- {
- QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
- /* Clamp to valid range */
- QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;
- }
- }
- else
- QIndex = cpi->common.base_qindex;
-
- /* This initialization should be called at least once. Use ok_to_skip to
- * decide if it is ok to skip.
- * Before encoding a frame, this function is always called with ok_to_skip
- * =0, which means no skiping of calculations. The "last" values are
- * initialized at that time.
- */
- if (!ok_to_skip || QIndex != x->q_index)
- {
-
- xd->dequant_y1_dc[0] = 1;
- xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];
- xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];
- xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];
-
- for (i = 1; i < 16; i++)
- {
- xd->dequant_y1_dc[i] =
- xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];
- xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];
- xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];
- }
-#if 1
- /*TODO: Remove dequant from BLOCKD. This is a temporary solution until
- * the quantizer code uses a passed in pointer to the dequant constants.
- * This will also require modifications to the x86 and neon assembly.
- * */
- for (i = 0; i < 16; i++)
- x->e_mbd.block[i].dequant = xd->dequant_y1;
- for (i = 16; i < 24; i++)
- x->e_mbd.block[i].dequant = xd->dequant_uv;
- x->e_mbd.block[24].dequant = xd->dequant_y2;
-#endif
-
- /* Y */
- zbin_extra = ZBIN_EXTRA_Y;
-
- for (i = 0; i < 16; i++)
- {
- x->block[i].quant = cpi->Y1quant[QIndex];
- x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
- x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
- x->block[i].zbin = cpi->Y1zbin[QIndex];
- x->block[i].round = cpi->Y1round[QIndex];
- x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
- x->block[i].zbin_extra = (short)zbin_extra;
- }
-
- /* UV */
- zbin_extra = ZBIN_EXTRA_UV;
-
- for (i = 16; i < 24; i++)
- {
- x->block[i].quant = cpi->UVquant[QIndex];
- x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
- x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
- x->block[i].zbin = cpi->UVzbin[QIndex];
- x->block[i].round = cpi->UVround[QIndex];
- x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
- x->block[i].zbin_extra = (short)zbin_extra;
- }
-
- /* Y2 */
- zbin_extra = ZBIN_EXTRA_Y2;
-
- x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
- x->block[24].quant = cpi->Y2quant[QIndex];
- x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
- x->block[24].zbin = cpi->Y2zbin[QIndex];
- x->block[24].round = cpi->Y2round[QIndex];
- x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
- x->block[24].zbin_extra = (short)zbin_extra;
-
- /* save this macroblock QIndex for vp8_update_zbin_extra() */
- x->q_index = QIndex;
-
- x->last_zbin_over_quant = x->zbin_over_quant;
- x->last_zbin_mode_boost = x->zbin_mode_boost;
- x->last_act_zbin_adj = x->act_zbin_adj;
-
-
-
- }
- else if(x->last_zbin_over_quant != x->zbin_over_quant
- || x->last_zbin_mode_boost != x->zbin_mode_boost
- || x->last_act_zbin_adj != x->act_zbin_adj)
- {
- /* Y */
- zbin_extra = ZBIN_EXTRA_Y;
-
- for (i = 0; i < 16; i++)
- x->block[i].zbin_extra = (short)zbin_extra;
-
- /* UV */
- zbin_extra = ZBIN_EXTRA_UV;
-
- for (i = 16; i < 24; i++)
- x->block[i].zbin_extra = (short)zbin_extra;
-
- /* Y2 */
- zbin_extra = ZBIN_EXTRA_Y2;
- x->block[24].zbin_extra = (short)zbin_extra;
-
- x->last_zbin_over_quant = x->zbin_over_quant;
- x->last_zbin_mode_boost = x->zbin_mode_boost;
- x->last_act_zbin_adj = x->act_zbin_adj;
- }
-}
-
-void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
-{
- int i;
- int QIndex = x->q_index;
- int zbin_extra;
-
- /* Y */
- zbin_extra = ZBIN_EXTRA_Y;
-
- for (i = 0; i < 16; i++)
- x->block[i].zbin_extra = (short)zbin_extra;
-
- /* UV */
- zbin_extra = ZBIN_EXTRA_UV;
-
- for (i = 16; i < 24; i++)
- x->block[i].zbin_extra = (short)zbin_extra;
-
- /* Y2 */
- zbin_extra = ZBIN_EXTRA_Y2;
- x->block[24].zbin_extra = (short)zbin_extra;
-}
-#undef ZBIN_EXTRA_Y
-#undef ZBIN_EXTRA_UV
-#undef ZBIN_EXTRA_Y2
-
-void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
-{
- /* Clear Zbin mode boost for default case */
- cpi->mb.zbin_mode_boost = 0;
-
- /* MB level quantizer setup */
- vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0);
-}
-
-
-void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
-{
- VP8_COMMON *cm = &cpi->common;
- MACROBLOCKD *mbd = &cpi->mb.e_mbd;
- int update = 0;
- int new_delta_q;
- int new_uv_delta_q;
- cm->base_qindex = Q;
-
- /* if any of the delta_q values are changing update flag has to be set */
- /* currently only y2dc_delta_q may change */
-
- cm->y1dc_delta_q = 0;
- cm->y2ac_delta_q = 0;
-
- if (Q < 4)
- {
- new_delta_q = 4-Q;
- }
- else
- new_delta_q = 0;
-
- update |= cm->y2dc_delta_q != new_delta_q;
- cm->y2dc_delta_q = new_delta_q;
-
- new_uv_delta_q = 0;
- // For screen content, lower the q value for UV channel. For now, select
- // conservative delta; same delta for dc and ac, and decrease it with lower
- // Q, and set to 0 below some threshold. May want to condition this in
- // future on the variance/energy in UV channel.
- if (cpi->oxcf.screen_content_mode && Q > 40) {
- new_uv_delta_q = -(int)(0.15 * Q);
- // Check range: magnitude of delta is 4 bits.
- if (new_uv_delta_q < -15) {
- new_uv_delta_q = -15;
- }
- }
- update |= cm->uvdc_delta_q != new_uv_delta_q;
- cm->uvdc_delta_q = new_uv_delta_q;
- cm->uvac_delta_q = new_uv_delta_q;
-
- /* Set Segment specific quatizers */
- mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
- mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];
- mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2];
- mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3];
-
- /* quantizer has to be reinitialized for any delta_q changes */
- if(update)
- vp8cx_init_quantizer(cpi);
-
-}
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -28,7 +28,7 @@
#include "vp8/common/findnearmv.h"
#include "vp8/common/quant_common.h"
#include "encodemb.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
#include "vpx_dsp/variance.h"
#include "mcomp.h"
#include "rdopt.h"
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -12,7 +12,7 @@
#include "vp8/common/onyxc_int.h"
#include "onyx_int.h"
#include "vp8/common/systemdependent.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
#include "vp8/common/alloccommon.h"
#include "mcomp.h"
#include "firstpass.h"
--- /dev/null
+++ b/vp8/encoder/vp8_quantize.c
@@ -1,0 +1,583 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <math.h>
+#include "vpx_mem/vpx_mem.h"
+
+#include "onyx_int.h"
+#include "vp8/encoder/quantize.h"
+#include "vp8/common/quant_common.h"
+
+void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+ int i, rc, eob;
+ int x, y, z, sz;
+ short *coeff_ptr = b->coeff;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant_fast;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+
+ eob = -1;
+ for (i = 0; i < 16; i++)
+ {
+ rc = vp8_default_zig_zag1d[i];
+ z = coeff_ptr[rc];
+
+ sz = (z >> 31); /* sign of z */
+ x = (z ^ sz) - sz; /* x = abs(z) */
+
+ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */
+ x = (y ^ sz) - sz; /* get the sign back */
+ qcoeff_ptr[rc] = x; /* write to destination */
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
+
+ if (y)
+ {
+ eob = i; /* last nonzero coeffs */
+ }
+ }
+ *d->eob = (char)(eob + 1);
+}
+
+void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ short *zbin_boost_ptr = b->zrun_zbin_boost;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ short *quant_shift_ptr = b->quant_shift;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+ short zbin_oq_value = b->zbin_extra;
+
+ memset(qcoeff_ptr, 0, 32);
+ memset(dqcoeff_ptr, 0, 32);
+
+ eob = -1;
+
+ for (i = 0; i < 16; i++)
+ {
+ rc = vp8_default_zig_zag1d[i];
+ z = coeff_ptr[rc];
+
+ zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+
+ zbin_boost_ptr ++;
+ sz = (z >> 31); /* sign of z */
+ x = (z ^ sz) - sz; /* x = abs(z) */
+
+ if (x >= zbin)
+ {
+ x += round_ptr[rc];
+ y = ((((x * quant_ptr[rc]) >> 16) + x)
+ * quant_shift_ptr[rc]) >> 16; /* quantize (x) */
+ x = (y ^ sz) - sz; /* get the sign back */
+ qcoeff_ptr[rc] = x; /* write to destination */
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
+
+ if (y)
+ {
+ eob = i; /* last nonzero coeffs */
+ zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */
+ }
+ }
+ }
+
+ *d->eob = (char)(eob + 1);
+}
+
+void vp8_quantize_mby(MACROBLOCK *x)
+{
+ int i;
+ int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+ && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
+
+ for (i = 0; i < 16; i++)
+ x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+
+ if(has_2nd_order)
+ x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
+}
+
+void vp8_quantize_mb(MACROBLOCK *x)
+{
+ int i;
+ int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+ && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
+
+ for (i = 0; i < 24+has_2nd_order; i++)
+ x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+}
+
+
+void vp8_quantize_mbuv(MACROBLOCK *x)
+{
+ int i;
+
+ for (i = 16; i < 24; i++)
+ x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+}
+
+static const int qrounding_factors[129] =
+{
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48
+};
+
+
+static const int qzbin_factors[129] =
+{
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80
+};
+
+
+static const int qrounding_factors_y2[129] =
+{
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48
+};
+
+
+static const int qzbin_factors_y2[129] =
+{
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80
+};
+
+
+static void invert_quant(int improved_quant, short *quant,
+ short *shift, short d)
+{
+ if(improved_quant)
+ {
+ unsigned t;
+ int l;
+ t = d;
+ for(l = 0; t > 1; l++)
+ t>>=1;
+ t = 1 + (1<<(16+l))/d;
+ *quant = (short)(t - (1<<16));
+ *shift = l;
+ /* use multiplication and constant shift by 16 */
+ *shift = 1 << (16 - *shift);
+ }
+ else
+ {
+ *quant = (1 << 16) / d;
+ *shift = 0;
+ /* use multiplication and constant shift by 16 */
+ *shift = 1 << (16 - *shift);
+ }
+}
+
+
+void vp8cx_init_quantizer(VP8_COMP *cpi)
+{
+ int i;
+ int quant_val;
+ int Q;
+
+ int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44,
+ 44, 44};
+
+ for (Q = 0; Q < QINDEX_RANGE; Q++)
+ {
+ /* dc values */
+ quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
+ cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
+ invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
+ cpi->Y1quant_shift[Q] + 0, quant_val);
+ cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.Y1dequant[Q][0] = quant_val;
+ cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+ quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
+ cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
+ invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
+ cpi->Y2quant_shift[Q] + 0, quant_val);
+ cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+ cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+ cpi->common.Y2dequant[Q][0] = quant_val;
+ cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+ quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
+ cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
+ invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
+ cpi->UVquant_shift[Q] + 0, quant_val);
+ cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+ cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.UVdequant[Q][0] = quant_val;
+ cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+ /* all the ac values = ; */
+ quant_val = vp8_ac_yquant(Q);
+ cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;
+ invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,
+ cpi->Y1quant_shift[Q] + 1, quant_val);
+ cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.Y1dequant[Q][1] = quant_val;
+ cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+ quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+ cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val;
+ invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1,
+ cpi->Y2quant_shift[Q] + 1, quant_val);
+ cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+ cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+ cpi->common.Y2dequant[Q][1] = quant_val;
+ cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+ quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+ cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val;
+ invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1,
+ cpi->UVquant_shift[Q] + 1, quant_val);
+ cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.UVdequant[Q][1] = quant_val;
+ cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+ for (i = 2; i < 16; i++)
+ {
+ cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1];
+ cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1];
+ cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];
+ cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];
+ cpi->Y1round[Q][i] = cpi->Y1round[Q][1];
+ cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *
+ zbin_boost[i]) >> 7;
+
+ cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1];
+ cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1];
+ cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];
+ cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];
+ cpi->Y2round[Q][i] = cpi->Y2round[Q][1];
+ cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *
+ zbin_boost[i]) >> 7;
+
+ cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1];
+ cpi->UVquant[Q][i] = cpi->UVquant[Q][1];
+ cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];
+ cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];
+ cpi->UVround[Q][i] = cpi->UVround[Q][1];
+ cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *
+ zbin_boost[i]) >> 7;
+ }
+ }
+}
+
+#define ZBIN_EXTRA_Y \
+ (( cpi->common.Y1dequant[QIndex][1] * \
+ ( x->zbin_over_quant + \
+ x->zbin_mode_boost + \
+ x->act_zbin_adj ) ) >> 7)
+
+#define ZBIN_EXTRA_UV \
+ (( cpi->common.UVdequant[QIndex][1] * \
+ ( x->zbin_over_quant + \
+ x->zbin_mode_boost + \
+ x->act_zbin_adj ) ) >> 7)
+
+#define ZBIN_EXTRA_Y2 \
+ (( cpi->common.Y2dequant[QIndex][1] * \
+ ( (x->zbin_over_quant / 2) + \
+ x->zbin_mode_boost + \
+ x->act_zbin_adj ) ) >> 7)
+
+void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
+{
+ int i;
+ int QIndex;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int zbin_extra;
+
+ /* Select the baseline MB Q index. */
+ if (xd->segmentation_enabled)
+ {
+ /* Abs Value */
+ if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+ QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
+ /* Delta Value */
+ else
+ {
+ QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
+ /* Clamp to valid range */
+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;
+ }
+ }
+ else
+ QIndex = cpi->common.base_qindex;
+
+ /* This initialization should be called at least once. Use ok_to_skip to
+ * decide if it is ok to skip.
+ * Before encoding a frame, this function is always called with ok_to_skip
+ * =0, which means no skiping of calculations. The "last" values are
+ * initialized at that time.
+ */
+ if (!ok_to_skip || QIndex != x->q_index)
+ {
+
+ xd->dequant_y1_dc[0] = 1;
+ xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];
+ xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];
+ xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];
+
+ for (i = 1; i < 16; i++)
+ {
+ xd->dequant_y1_dc[i] =
+ xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];
+ xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];
+ xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];
+ }
+#if 1
+ /*TODO: Remove dequant from BLOCKD. This is a temporary solution until
+ * the quantizer code uses a passed in pointer to the dequant constants.
+ * This will also require modifications to the x86 and neon assembly.
+ * */
+ for (i = 0; i < 16; i++)
+ x->e_mbd.block[i].dequant = xd->dequant_y1;
+ for (i = 16; i < 24; i++)
+ x->e_mbd.block[i].dequant = xd->dequant_uv;
+ x->e_mbd.block[24].dequant = xd->dequant_y2;
+#endif
+
+ /* Y */
+ zbin_extra = ZBIN_EXTRA_Y;
+
+ for (i = 0; i < 16; i++)
+ {
+ x->block[i].quant = cpi->Y1quant[QIndex];
+ x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
+ x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
+ x->block[i].zbin = cpi->Y1zbin[QIndex];
+ x->block[i].round = cpi->Y1round[QIndex];
+ x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
+ x->block[i].zbin_extra = (short)zbin_extra;
+ }
+
+ /* UV */
+ zbin_extra = ZBIN_EXTRA_UV;
+
+ for (i = 16; i < 24; i++)
+ {
+ x->block[i].quant = cpi->UVquant[QIndex];
+ x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
+ x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
+ x->block[i].zbin = cpi->UVzbin[QIndex];
+ x->block[i].round = cpi->UVround[QIndex];
+ x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
+ x->block[i].zbin_extra = (short)zbin_extra;
+ }
+
+ /* Y2 */
+ zbin_extra = ZBIN_EXTRA_Y2;
+
+ x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
+ x->block[24].quant = cpi->Y2quant[QIndex];
+ x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
+ x->block[24].zbin = cpi->Y2zbin[QIndex];
+ x->block[24].round = cpi->Y2round[QIndex];
+ x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
+ x->block[24].zbin_extra = (short)zbin_extra;
+
+ /* save this macroblock QIndex for vp8_update_zbin_extra() */
+ x->q_index = QIndex;
+
+ x->last_zbin_over_quant = x->zbin_over_quant;
+ x->last_zbin_mode_boost = x->zbin_mode_boost;
+ x->last_act_zbin_adj = x->act_zbin_adj;
+
+
+
+ }
+ else if(x->last_zbin_over_quant != x->zbin_over_quant
+ || x->last_zbin_mode_boost != x->zbin_mode_boost
+ || x->last_act_zbin_adj != x->act_zbin_adj)
+ {
+ /* Y */
+ zbin_extra = ZBIN_EXTRA_Y;
+
+ for (i = 0; i < 16; i++)
+ x->block[i].zbin_extra = (short)zbin_extra;
+
+ /* UV */
+ zbin_extra = ZBIN_EXTRA_UV;
+
+ for (i = 16; i < 24; i++)
+ x->block[i].zbin_extra = (short)zbin_extra;
+
+ /* Y2 */
+ zbin_extra = ZBIN_EXTRA_Y2;
+ x->block[24].zbin_extra = (short)zbin_extra;
+
+ x->last_zbin_over_quant = x->zbin_over_quant;
+ x->last_zbin_mode_boost = x->zbin_mode_boost;
+ x->last_act_zbin_adj = x->act_zbin_adj;
+ }
+}
+
+void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
+{
+ int i;
+ int QIndex = x->q_index;
+ int zbin_extra;
+
+ /* Y */
+ zbin_extra = ZBIN_EXTRA_Y;
+
+ for (i = 0; i < 16; i++)
+ x->block[i].zbin_extra = (short)zbin_extra;
+
+ /* UV */
+ zbin_extra = ZBIN_EXTRA_UV;
+
+ for (i = 16; i < 24; i++)
+ x->block[i].zbin_extra = (short)zbin_extra;
+
+ /* Y2 */
+ zbin_extra = ZBIN_EXTRA_Y2;
+ x->block[24].zbin_extra = (short)zbin_extra;
+}
+#undef ZBIN_EXTRA_Y
+#undef ZBIN_EXTRA_UV
+#undef ZBIN_EXTRA_Y2
+
+void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
+{
+ /* Clear Zbin mode boost for default case */
+ cpi->mb.zbin_mode_boost = 0;
+
+ /* MB level quantizer setup */
+ vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0);
+}
+
+
+void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
+{
+ VP8_COMMON *cm = &cpi->common;
+ MACROBLOCKD *mbd = &cpi->mb.e_mbd;
+ int update = 0;
+ int new_delta_q;
+ int new_uv_delta_q;
+ cm->base_qindex = Q;
+
+ /* if any of the delta_q values are changing update flag has to be set */
+ /* currently only y2dc_delta_q may change */
+
+ cm->y1dc_delta_q = 0;
+ cm->y2ac_delta_q = 0;
+
+ if (Q < 4)
+ {
+ new_delta_q = 4-Q;
+ }
+ else
+ new_delta_q = 0;
+
+ update |= cm->y2dc_delta_q != new_delta_q;
+ cm->y2dc_delta_q = new_delta_q;
+
+ new_uv_delta_q = 0;
+ // For screen content, lower the q value for UV channel. For now, select
+ // conservative delta; same delta for dc and ac, and decrease it with lower
+ // Q, and set to 0 below some threshold. May want to condition this in
+ // future on the variance/energy in UV channel.
+ if (cpi->oxcf.screen_content_mode && Q > 40) {
+ new_uv_delta_q = -(int)(0.15 * Q);
+ // Check range: magnitude of delta is 4 bits.
+ if (new_uv_delta_q < -15) {
+ new_uv_delta_q = -15;
+ }
+ }
+ update |= cm->uvdc_delta_q != new_uv_delta_q;
+ cm->uvdc_delta_q = new_uv_delta_q;
+ cm->uvac_delta_q = new_uv_delta_q;
+
+ /* Set Segment specific quatizers */
+ mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
+ mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];
+ mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2];
+ mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3];
+
+ /* quantizer has to be reinitialized for any delta_q changes */
+ if(update)
+ vp8cx_init_quantizer(cpi);
+
+}
--- a/vp8/encoder/x86/quantize_sse2.c
+++ /dev/null
@@ -1,228 +1,0 @@
-/*
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_config.h"
-#include "vp8_rtcd.h"
-#include "vpx_ports/x86.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vp8/encoder/block.h"
-#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */
-
-#include <mmintrin.h> /* MMX */
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#define SELECT_EOB(i, z) \
- do { \
- short boost = *zbin_boost_ptr; \
- int cmp = (x[z] < boost) | (y[z] == 0); \
- zbin_boost_ptr++; \
- if (cmp) \
- break; \
- qcoeff_ptr[z] = y[z]; \
- eob = i; \
- zbin_boost_ptr = b->zrun_zbin_boost; \
- } while (0)
-
-void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
-{
- char eob = 0;
- short *zbin_boost_ptr;
- short *qcoeff_ptr = d->qcoeff;
- DECLARE_ALIGNED(16, short, x[16]);
- DECLARE_ALIGNED(16, short, y[16]);
-
- __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;
- __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));
- __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));
- __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
- __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));
- __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);
- __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));
- __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));
- __m128i round0 = _mm_load_si128((__m128i *)(b->round));
- __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
- __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));
- __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));
- __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
- __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
-
- memset(qcoeff_ptr, 0, 32);
-
- /* Duplicate to all lanes. */
- zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
- zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);
-
- /* Sign of z: z >> 15 */
- sz0 = _mm_srai_epi16(z0, 15);
- sz1 = _mm_srai_epi16(z1, 15);
-
- /* x = abs(z): (z ^ sz) - sz */
- x0 = _mm_xor_si128(z0, sz0);
- x1 = _mm_xor_si128(z1, sz1);
- x0 = _mm_sub_epi16(x0, sz0);
- x1 = _mm_sub_epi16(x1, sz1);
-
- /* zbin[] + zbin_extra */
- zbin0 = _mm_add_epi16(zbin0, zbin_extra);
- zbin1 = _mm_add_epi16(zbin1, zbin_extra);
-
- /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance
- * the equation because boost is the only value which can change:
- * x - (zbin[] + extra) >= boost */
- x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);
- x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);
-
- _mm_store_si128((__m128i *)(x), x_minus_zbin0);
- _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);
-
- /* All the remaining calculations are valid whether they are done now with
- * simd or later inside the loop one at a time. */
- x0 = _mm_add_epi16(x0, round0);
- x1 = _mm_add_epi16(x1, round1);
-
- y0 = _mm_mulhi_epi16(x0, quant0);
- y1 = _mm_mulhi_epi16(x1, quant1);
-
- y0 = _mm_add_epi16(y0, x0);
- y1 = _mm_add_epi16(y1, x1);
-
- /* Instead of shifting each value independently we convert the scaling
- * factor with 1 << (16 - shift) so we can use multiply/return high half. */
- y0 = _mm_mulhi_epi16(y0, quant_shift0);
- y1 = _mm_mulhi_epi16(y1, quant_shift1);
-
- /* Return the sign: (y ^ sz) - sz */
- y0 = _mm_xor_si128(y0, sz0);
- y1 = _mm_xor_si128(y1, sz1);
- y0 = _mm_sub_epi16(y0, sz0);
- y1 = _mm_sub_epi16(y1, sz1);
-
- _mm_store_si128((__m128i *)(y), y0);
- _mm_store_si128((__m128i *)(y + 8), y1);
-
- zbin_boost_ptr = b->zrun_zbin_boost;
-
- /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */
- SELECT_EOB(1, 0);
- SELECT_EOB(2, 1);
- SELECT_EOB(3, 4);
- SELECT_EOB(4, 8);
- SELECT_EOB(5, 5);
- SELECT_EOB(6, 2);
- SELECT_EOB(7, 3);
- SELECT_EOB(8, 6);
- SELECT_EOB(9, 9);
- SELECT_EOB(10, 12);
- SELECT_EOB(11, 13);
- SELECT_EOB(12, 10);
- SELECT_EOB(13, 7);
- SELECT_EOB(14, 11);
- SELECT_EOB(15, 14);
- SELECT_EOB(16, 15);
-
- y0 = _mm_load_si128((__m128i *)(d->qcoeff));
- y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));
-
- /* dqcoeff = qcoeff * dequant */
- y0 = _mm_mullo_epi16(y0, dequant0);
- y1 = _mm_mullo_epi16(y1, dequant1);
-
- _mm_store_si128((__m128i *)(d->dqcoeff), y0);
- _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);
-
- *d->eob = eob;
-}
-
-void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
-{
- __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
- __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
- __m128i round0 = _mm_load_si128((__m128i *)(b->round));
- __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
- __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
- __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
- __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
- __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
- __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag));
- __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8));
-
- __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones;
-
- /* sign of z: z >> 15 */
- sz0 = _mm_srai_epi16(z0, 15);
- sz1 = _mm_srai_epi16(z1, 15);
-
- /* x = abs(z): (z ^ sz) - sz */
- x0 = _mm_xor_si128(z0, sz0);
- x1 = _mm_xor_si128(z1, sz1);
- x0 = _mm_sub_epi16(x0, sz0);
- x1 = _mm_sub_epi16(x1, sz1);
-
- /* x += round */
- x0 = _mm_add_epi16(x0, round0);
- x1 = _mm_add_epi16(x1, round1);
-
- /* y = (x * quant) >> 16 */
- y0 = _mm_mulhi_epi16(x0, quant_fast0);
- y1 = _mm_mulhi_epi16(x1, quant_fast1);
-
- /* x = abs(y) = (y ^ sz) - sz */
- y0 = _mm_xor_si128(y0, sz0);
- y1 = _mm_xor_si128(y1, sz1);
- x0 = _mm_sub_epi16(y0, sz0);
- x1 = _mm_sub_epi16(y1, sz1);
-
- /* qcoeff = x */
- _mm_store_si128((__m128i *)(d->qcoeff), x0);
- _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
-
- /* x * dequant */
- xdq0 = _mm_mullo_epi16(x0, dequant0);
- xdq1 = _mm_mullo_epi16(x1, dequant1);
-
- /* dqcoeff = x * dequant */
- _mm_store_si128((__m128i *)(d->dqcoeff), xdq0);
- _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1);
-
- /* build a mask for the zig zag */
- zeros = _mm_setzero_si128();
-
- x0 = _mm_cmpeq_epi16(x0, zeros);
- x1 = _mm_cmpeq_epi16(x1, zeros);
-
- ones = _mm_cmpeq_epi16(zeros, zeros);
-
- x0 = _mm_xor_si128(x0, ones);
- x1 = _mm_xor_si128(x1, ones);
-
- x0 = _mm_and_si128(x0, inv_zig_zag0);
- x1 = _mm_and_si128(x1, inv_zig_zag1);
-
- x0 = _mm_max_epi16(x0, x1);
-
- /* now down to 8 */
- x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110
-
- x0 = _mm_max_epi16(x0, x1);
-
- /* only 4 left */
- x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110
-
- x0 = _mm_max_epi16(x0, x1);
-
- /* okay, just 2! */
- x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001
-
- x0 = _mm_max_epi16(x0, x1);
-
- *d->eob = 0xFF & _mm_cvtsi128_si32(x0);
-}
--- /dev/null
+++ b/vp8/encoder/x86/vp8_quantize_sse2.c
@@ -1,0 +1,228 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "vpx_ports/x86.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp8/encoder/block.h"
+#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */
+
+#include <mmintrin.h> /* MMX */
+#include <xmmintrin.h> /* SSE */
+#include <emmintrin.h> /* SSE2 */
+
+#define SELECT_EOB(i, z) \
+ do { \
+ short boost = *zbin_boost_ptr; \
+ int cmp = (x[z] < boost) | (y[z] == 0); \
+ zbin_boost_ptr++; \
+ if (cmp) \
+ break; \
+ qcoeff_ptr[z] = y[z]; \
+ eob = i; \
+ zbin_boost_ptr = b->zrun_zbin_boost; \
+ } while (0)
+
+void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
+{
+ char eob = 0;
+ short *zbin_boost_ptr;
+ short *qcoeff_ptr = d->qcoeff;
+ DECLARE_ALIGNED(16, short, x[16]);
+ DECLARE_ALIGNED(16, short, y[16]);
+
+ __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;
+ __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));
+ __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));
+ __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+ __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));
+ __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);
+ __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));
+ __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));
+ __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+ __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+ __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));
+ __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));
+ __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+ __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+
+ memset(qcoeff_ptr, 0, 32);
+
+ /* Duplicate to all lanes. */
+ zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
+ zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);
+
+ /* Sign of z: z >> 15 */
+ sz0 = _mm_srai_epi16(z0, 15);
+ sz1 = _mm_srai_epi16(z1, 15);
+
+ /* x = abs(z): (z ^ sz) - sz */
+ x0 = _mm_xor_si128(z0, sz0);
+ x1 = _mm_xor_si128(z1, sz1);
+ x0 = _mm_sub_epi16(x0, sz0);
+ x1 = _mm_sub_epi16(x1, sz1);
+
+ /* zbin[] + zbin_extra */
+ zbin0 = _mm_add_epi16(zbin0, zbin_extra);
+ zbin1 = _mm_add_epi16(zbin1, zbin_extra);
+
+ /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance
+ * the equation because boost is the only value which can change:
+ * x - (zbin[] + extra) >= boost */
+ x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);
+ x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);
+
+ _mm_store_si128((__m128i *)(x), x_minus_zbin0);
+ _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);
+
+ /* All the remaining calculations are valid whether they are done now with
+ * simd or later inside the loop one at a time. */
+ x0 = _mm_add_epi16(x0, round0);
+ x1 = _mm_add_epi16(x1, round1);
+
+ y0 = _mm_mulhi_epi16(x0, quant0);
+ y1 = _mm_mulhi_epi16(x1, quant1);
+
+ y0 = _mm_add_epi16(y0, x0);
+ y1 = _mm_add_epi16(y1, x1);
+
+ /* Instead of shifting each value independently we convert the scaling
+ * factor with 1 << (16 - shift) so we can use multiply/return high half. */
+ y0 = _mm_mulhi_epi16(y0, quant_shift0);
+ y1 = _mm_mulhi_epi16(y1, quant_shift1);
+
+ /* Return the sign: (y ^ sz) - sz */
+ y0 = _mm_xor_si128(y0, sz0);
+ y1 = _mm_xor_si128(y1, sz1);
+ y0 = _mm_sub_epi16(y0, sz0);
+ y1 = _mm_sub_epi16(y1, sz1);
+
+ _mm_store_si128((__m128i *)(y), y0);
+ _mm_store_si128((__m128i *)(y + 8), y1);
+
+ zbin_boost_ptr = b->zrun_zbin_boost;
+
+ /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */
+ SELECT_EOB(1, 0);
+ SELECT_EOB(2, 1);
+ SELECT_EOB(3, 4);
+ SELECT_EOB(4, 8);
+ SELECT_EOB(5, 5);
+ SELECT_EOB(6, 2);
+ SELECT_EOB(7, 3);
+ SELECT_EOB(8, 6);
+ SELECT_EOB(9, 9);
+ SELECT_EOB(10, 12);
+ SELECT_EOB(11, 13);
+ SELECT_EOB(12, 10);
+ SELECT_EOB(13, 7);
+ SELECT_EOB(14, 11);
+ SELECT_EOB(15, 14);
+ SELECT_EOB(16, 15);
+
+ y0 = _mm_load_si128((__m128i *)(d->qcoeff));
+ y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));
+
+ /* dqcoeff = qcoeff * dequant */
+ y0 = _mm_mullo_epi16(y0, dequant0);
+ y1 = _mm_mullo_epi16(y1, dequant1);
+
+ _mm_store_si128((__m128i *)(d->dqcoeff), y0);
+ _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);
+
+ *d->eob = eob;
+}
+
+void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
+{
+ __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+ __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
+ __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+ __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+ __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
+ __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
+ __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+ __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+ __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag));
+ __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8));
+
+ __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones;
+
+ /* sign of z: z >> 15 */
+ sz0 = _mm_srai_epi16(z0, 15);
+ sz1 = _mm_srai_epi16(z1, 15);
+
+ /* x = abs(z): (z ^ sz) - sz */
+ x0 = _mm_xor_si128(z0, sz0);
+ x1 = _mm_xor_si128(z1, sz1);
+ x0 = _mm_sub_epi16(x0, sz0);
+ x1 = _mm_sub_epi16(x1, sz1);
+
+ /* x += round */
+ x0 = _mm_add_epi16(x0, round0);
+ x1 = _mm_add_epi16(x1, round1);
+
+ /* y = (x * quant) >> 16 */
+ y0 = _mm_mulhi_epi16(x0, quant_fast0);
+ y1 = _mm_mulhi_epi16(x1, quant_fast1);
+
+ /* x = abs(y) = (y ^ sz) - sz */
+ y0 = _mm_xor_si128(y0, sz0);
+ y1 = _mm_xor_si128(y1, sz1);
+ x0 = _mm_sub_epi16(y0, sz0);
+ x1 = _mm_sub_epi16(y1, sz1);
+
+ /* qcoeff = x */
+ _mm_store_si128((__m128i *)(d->qcoeff), x0);
+ _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
+
+ /* x * dequant */
+ xdq0 = _mm_mullo_epi16(x0, dequant0);
+ xdq1 = _mm_mullo_epi16(x1, dequant1);
+
+ /* dqcoeff = x * dequant */
+ _mm_store_si128((__m128i *)(d->dqcoeff), xdq0);
+ _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1);
+
+ /* build a mask for the zig zag */
+ zeros = _mm_setzero_si128();
+
+ x0 = _mm_cmpeq_epi16(x0, zeros);
+ x1 = _mm_cmpeq_epi16(x1, zeros);
+
+ ones = _mm_cmpeq_epi16(zeros, zeros);
+
+ x0 = _mm_xor_si128(x0, ones);
+ x1 = _mm_xor_si128(x1, ones);
+
+ x0 = _mm_and_si128(x0, inv_zig_zag0);
+ x1 = _mm_and_si128(x1, inv_zig_zag1);
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ /* now down to 8 */
+ x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ /* only 4 left */
+ x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ /* okay, just 2! */
+ x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ *d->eob = 0xFF & _mm_cvtsi128_si32(x0);
+}
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -60,7 +60,7 @@
VP8_CX_SRCS-yes += encoder/onyx_if.c
VP8_CX_SRCS-yes += encoder/pickinter.c
VP8_CX_SRCS-yes += encoder/picklpf.c
-VP8_CX_SRCS-yes += encoder/quantize.c
+VP8_CX_SRCS-yes += encoder/vp8_quantize.c
VP8_CX_SRCS-yes += encoder/ratectrl.c
VP8_CX_SRCS-yes += encoder/rdopt.c
VP8_CX_SRCS-yes += encoder/segmentation.c
@@ -84,7 +84,7 @@
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_quantize_sse2.c
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.c
VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.c