shithub: libvpx

--- a/test/vp9_quantize_test.cc

+++ b/test/vp9_quantize_test.cc

@@ -496,7 +496,6 @@

 #endif  // HAVE_SSE2

 #if HAVE_SSSE3

-#if CONFIG_VP9_HIGHBITDEPTH

 #if ARCH_X86_64

 INSTANTIATE_TEST_CASE_P(

     SSSE3, VP9QuantizeTest,

@@ -521,35 +520,9 @@

                                  false)));

 #endif  // ARCH_X86_64

-#else

-#if ARCH_X86_64

-INSTANTIATE_TEST_CASE_P(

-    SSSE3, VP9QuantizeTest,

-    ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,

-                                 VPX_BITS_8, 16, false),

-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,

-                                 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,

-                                 16, true),

-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,

-                                 &QuantFPWrapper<quantize_fp_32x32_nz_c>,

-                                 VPX_BITS_8, 32, true)));

-#else

-INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,

-                        ::testing::Values(make_tuple(&vpx_quantize_b_ssse3,

-                                                     &vpx_quantize_b_c,

-                                                     VPX_BITS_8, 16, false)));

-#endif  // ARCH_X86_64

-// TODO(webm:1448): lowbd truncates results in C.

-INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest,

-                        ::testing::Values(make_tuple(

-                            &vpx_quantize_b_32x32_ssse3,

-                            &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false)));

-#endif  // CONFIG_VP9_HIGHBITDEPTH

 #endif  // HAVE_SSSE3

 #if HAVE_AVX

-#if CONFIG_VP9_HIGHBITDEPTH

 INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,

                         ::testing::Values(make_tuple(&vpx_quantize_b_avx,

                                                      &vpx_quantize_b_c,

@@ -557,17 +530,6 @@

                                           make_tuple(&vpx_quantize_b_32x32_avx,

                                                      &vpx_quantize_b_32x32_c,

                                                      VPX_BITS_8, 32, false)));

-#else

-INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,

-                        ::testing::Values(make_tuple(&vpx_quantize_b_avx,

-                                                     &vpx_quantize_b_c,

-                                                     VPX_BITS_8, 16, false)));

-// TODO(webm:1448): lowbd truncates results in C.

-INSTANTIATE_TEST_CASE_P(DISABLED_AVX, VP9QuantizeTest,

-                        ::testing::Values(make_tuple(&vpx_quantize_b_32x32_avx,

-                                                     &vpx_quantize_b_32x32_c,

-                                                     VPX_BITS_8, 32, false)));

-#endif  // CONFIG_VP9_HIGHBITDEPTH

 #endif  // HAVE_AVX

 #if ARCH_X86_64 && HAVE_AVX2

@@ -576,7 +538,7 @@

     ::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,

                                  &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,

                                  16, true)));

-#endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH

+#endif  // HAVE_AVX2

 // TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.

 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH

--- a/vpx_dsp/quantize.c

+++ b/vpx_dsp/quantize.c

@@ -12,6 +12,7 @@

 #include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/quantize.h"

+#include "vpx_dsp/vpx_dsp_common.h"

 #include "vpx_mem/vpx_mem.h"

 void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,

@@ -259,7 +260,15 @@

15;

     qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;

+#if (ARCH_X86 || ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH

+    // When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than

+    // truncating with a cast, saturate the value. This is easier to implement

+    // on x86 and preserves the sign of the value.

+    dqcoeff_ptr[rc] =

+        clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX);

+#else

     dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;

+#endif  // ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH

     if (tmp) eob = idx_arr[i];

--- a/vpx_dsp/x86/quantize_ssse3.h

+++ b/vpx_dsp/x86/quantize_ssse3.h

@@ -24,7 +24,6 @@

   // Un-sign to bias rounding like C.

   const __m128i coeff = _mm_abs_epi16(qcoeff);

-#if CONFIG_VP9_HIGHBITDEPTH

   const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff);

   const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff);

@@ -40,17 +39,12 @@

   dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0);

   dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1);

+#if CONFIG_VP9_HIGHBITDEPTH

   _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0);

   _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1);

 #else

-  __m128i dqcoeff16 = _mm_mullo_epi16(coeff, dequant);

-  (void)zero;

-  dqcoeff16 = _mm_srli_epi16(dqcoeff16, 1);

-  dqcoeff16 = _mm_sign_epi16(dqcoeff16, qcoeff);

-  _mm_store_si128((__m128i *)(dqcoeff), dqcoeff16);

+  _mm_store_si128((__m128i *)(dqcoeff),

+                  _mm_packs_epi32(dqcoeff32_0, dqcoeff32_1));

 #endif  // CONFIG_VP9_HIGHBITDEPTH