ref: 8a4336ed2edea09b67f49828df1f8c526a85a7a6
parent: 1a7bf0d1f9d299fe76aa5cf9b6a448c279234150
author: Scott LaVarnway <[email protected]>
date: Mon Dec 18 01:31:46 EST 2017
Add vp9_quantize_fp_nz_c() -- 2 This c version uses the shortcuts found in the x86 vp9_quantize_fp functions. The test was updated to use the correct quant/round range. Change-Id: Ie5871f710d9eb39047d8d9f48b907c0633e1f830
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -14,9 +14,9 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
-#include "./vp9_rtcd.h"
#include "test/acm_random.h"
#include "test/buffer.h"
#include "test/clear_system_state.h"
@@ -42,7 +42,7 @@
uint16_t *eob, const int16_t *scan,
const int16_t *iscan);
typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
- int /*max_size*/>
+ int /*max_size*/, bool /*is_fp*/>
QuantizeParam;
// Wrapper for FP version which does not use zbin or quant_shift.
@@ -69,11 +69,15 @@
class VP9QuantizeBase {
public:
- VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size)
- : bit_depth_(bit_depth), max_size_(max_size) {
+ VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
+ : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
max_value_ = (1 << bit_depth_) - 1;
zbin_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
+ round_fp_ptr_ = reinterpret_cast<int16_t *>(
+ vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
+ quant_fp_ptr_ = reinterpret_cast<int16_t *>(
+ vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
round_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
quant_ptr_ =
@@ -86,11 +90,15 @@
~VP9QuantizeBase() {
vpx_free(zbin_ptr_);
+ vpx_free(round_fp_ptr_);
+ vpx_free(quant_fp_ptr_);
vpx_free(round_ptr_);
vpx_free(quant_ptr_);
vpx_free(quant_shift_ptr_);
vpx_free(dequant_ptr_);
zbin_ptr_ = NULL;
+ round_fp_ptr_ = NULL;
+ quant_fp_ptr_ = NULL;
round_ptr_ = NULL;
quant_ptr_ = NULL;
quant_shift_ptr_ = NULL;
@@ -100,6 +108,8 @@
protected:
int16_t *zbin_ptr_;
+ int16_t *round_fp_ptr_;
+ int16_t *quant_fp_ptr_;
int16_t *round_ptr_;
int16_t *quant_ptr_;
int16_t *quant_shift_ptr_;
@@ -107,6 +117,7 @@
const vpx_bit_depth_t bit_depth_;
int max_value_;
const int max_size_;
+ const bool is_fp_;
};
class VP9QuantizeTest : public VP9QuantizeBase,
@@ -113,8 +124,8 @@
public ::testing::TestWithParam<QuantizeParam> {
public:
VP9QuantizeTest()
- : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3)), quantize_op_(GET_PARAM(0)),
- ref_quantize_op_(GET_PARAM(1)) {}
+ : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
+ quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
protected:
const QuantizeFunc quantize_op_;
@@ -121,15 +132,88 @@
const QuantizeFunc ref_quantize_op_;
};
+// This quantizer compares the AC coefficients to the quantization step size to
+// determine if further multiplication operations are needed.
+// Based on vp9_quantize_fp_sse2().
+void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
+ int i, eob = -1;
+ const int thr = dequant_ptr[1] >> 1;
+ (void)iscan;
+ (void)skip_block;
+ assert(!skip_block);
+
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < n_coeffs; i += 16) {
+ int y;
+ int nzflag_cnt = 0;
+ int abs_coeff[16];
+ int coeff_sign[16];
+
+ // count nzflag for each row (16 tran_low_t)
+ for (y = 0; y < 16; ++y) {
+ const int rc = i + y;
+ const int coeff = coeff_ptr[rc];
+ coeff_sign[y] = (coeff >> 31);
+ abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
+ // The first 16 are skipped in the sse2 code. Do the same here to match.
+ if (i >= 16 && (abs_coeff[y] <= thr)) {
+ nzflag_cnt++;
+ }
+ }
+
+ for (y = 0; y < 16; ++y) {
+ const int rc = i + y;
+ // If all of the AC coeffs in a row has magnitude less than the
+ // quantization step_size/2, quantize to zero.
+ if (nzflag_cnt < 16) {
+ int tmp =
+ clamp(abs_coeff[y] + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant_ptr[rc != 0]) >> 16;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+ } else {
+ qcoeff_ptr[rc] = 0;
+ dqcoeff_ptr[rc] = 0;
+ }
+ }
+ }
+
+ // Scan for eob.
+ for (i = 0; i < n_coeffs; i++) {
+ // Use the scan order to find the correct eob.
+ const int rc = scan[i];
+ if (qcoeff_ptr[rc]) {
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
int16_t *quant, int16_t *quant_shift,
- int16_t *dequant) {
+ int16_t *dequant, int16_t *round_fp,
+ int16_t *quant_fp) {
+ // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
+ const int max_qrounding_factor_fp = 64;
+
for (int j = 0; j < 2; j++) {
+ // The range is 4 to 1828 in the VP9 tables.
+ const int qlookup = rnd->RandRange(1825) + 4;
+ round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
+ quant_fp[j] = (1 << 16) / qlookup;
+
// Values determined by deconstructing vp9_init_quantizer().
// zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
// values or U/V values of any bit depth. This is because y_delta is not
// factored into the vp9_ac_quant() call.
zbin[j] = rnd->RandRange(1200);
+
// round may be up to 685 for Y values or 914 for U/V.
round[j] = rnd->RandRange(914);
// quant ranges from 1 to -32703
@@ -141,6 +225,8 @@
}
for (int j = 2; j < 8; j++) {
zbin[j] = zbin[1];
+ round_fp[j] = round_fp[1];
+ quant_fp[j] = quant_fp[1];
round[j] = round[1];
quant[j] = quant[1];
quant_shift[j] = quant_shift[1];
@@ -179,20 +265,20 @@
const int count = (4 << sz) * (4 << sz);
coeff.Set(&rnd, -max_value_, max_value_);
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
- quant_shift_ptr_, dequant_ptr_);
+ quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+ quant_fp_ptr_);
+ int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+ int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
+ ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
+ q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
+ ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
+ scan_order->scan, scan_order->iscan);
- ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
- round_ptr_, quant_ptr_, quant_shift_ptr_,
- ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
- dequant_ptr_, &ref_eob, scan_order->scan,
- scan_order->iscan);
+ ASM_REGISTER_STATE_CHECK(quantize_op_(
+ coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
+ quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
+ dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
- ASM_REGISTER_STATE_CHECK(
- quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
- round_ptr_, quant_ptr_, quant_shift_ptr_,
- qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
- dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
-
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -241,20 +327,20 @@
coeff.TopLeftPixel()[rnd(count)] =
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
- quant_shift_ptr_, dequant_ptr_);
+ quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+ quant_fp_ptr_);
+ int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+ int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
+ ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
+ q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
+ ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
+ scan_order->scan, scan_order->iscan);
- ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
- round_ptr_, quant_ptr_, quant_shift_ptr_,
- ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
- dequant_ptr_, &ref_eob, scan_order->scan,
- scan_order->iscan);
+ ASM_REGISTER_STATE_CHECK(quantize_op_(
+ coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
+ quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
+ dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
- ASM_REGISTER_STATE_CHECK(
- quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
- round_ptr_, quant_ptr_, quant_shift_ptr_,
- qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
- dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
-
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -299,7 +385,10 @@
const int count = (4 << sz) * (4 << sz);
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
- quant_shift_ptr_, dequant_ptr_);
+ quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+ quant_fp_ptr_);
+ int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+ int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
if (i == 0) {
// When |coeff values| are less than zbin the results are 0.
@@ -319,10 +408,10 @@
vpx_usec_timer timer;
vpx_usec_timer_start(&timer);
for (int j = 0; j < 100000000 / count; ++j) {
- quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
- round_ptr_, quant_ptr_, quant_shift_ptr_,
- qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
- dequant_ptr_, &eob, scan_order->scan, scan_order->iscan);
+ quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
+ q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
+ dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
+ scan_order->scan, scan_order->iscan);
}
vpx_usec_timer_mark(&timer);
const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
@@ -345,37 +434,44 @@
SSE2, VP9QuantizeTest,
::testing::Values(
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
- VPX_BITS_8, 16),
+ VPX_BITS_8, 16, false),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
- VPX_BITS_10, 16),
+ VPX_BITS_10, 16, false),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
- VPX_BITS_12, 16),
+ VPX_BITS_12, 16, false),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
- &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32),
+ &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
- &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32),
+ &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
- &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32)));
+ &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));
#else
-INSTANTIATE_TEST_CASE_P(SSE2, VP9QuantizeTest,
- ::testing::Values(make_tuple(&vpx_quantize_b_sse2,
- &vpx_quantize_b_c,
- VPX_BITS_8, 16)));
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
INSTANTIATE_TEST_CASE_P(
- DISABLED_SSE2, VP9QuantizeTest,
- ::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
- &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
- 16)));
+ SSE2, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
+ 16, true)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
+#if ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(
+ SSSE3, VP9QuantizeTest,
+ ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
+ 16, true)));
+#else
INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
&vpx_quantize_b_c,
- VPX_BITS_8, 16)));
+ VPX_BITS_8, 16, false)));
+#endif
#if ARCH_X86_64
// TODO(johannkoenig): SSSE3 optimizations do not yet pass this test.
@@ -382,13 +478,11 @@
INSTANTIATE_TEST_CASE_P(
DISABLED_SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
- &vpx_quantize_b_32x32_c, VPX_BITS_8, 32),
- make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
- &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
- 16),
+ &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
+ false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
- VPX_BITS_8, 32)));
+ VPX_BITS_8, 32, true)));
#endif // ARCH_X86_64
#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
@@ -398,11 +492,12 @@
INSTANTIATE_TEST_CASE_P(
AVX, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c,
- VPX_BITS_8, 16),
+ VPX_BITS_8, 16, false),
// Even though SSSE3 and AVX do not match the reference
// code, we can keep them in sync with each other.
make_tuple(&vpx_quantize_b_32x32_avx,
- &vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32)));
+ &vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32,
+ false)));
#endif // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
// TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
@@ -409,14 +504,17 @@
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
NEON, VP9QuantizeTest,
- ::testing::Values(
- make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16),
- make_tuple(&vpx_quantize_b_32x32_neon, &vpx_quantize_b_32x32_c,
- VPX_BITS_8, 32),
- make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
- &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
- make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
- &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
+ ::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&vpx_quantize_b_32x32_neon,
+ &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
+ false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
+ &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
+ 16, true),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
+ &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
+ VPX_BITS_8, 32, true)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
// Only useful to compare "Speed" test results.
@@ -423,11 +521,14 @@
INSTANTIATE_TEST_CASE_P(
DISABLED_C, VP9QuantizeTest,
::testing::Values(
- make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16),
+ make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
- 32),
+ 32, false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
- &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
+ &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
+ make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
- &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
+ &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
+ true)));
} // namespace