shithub: libvpx

Download patch

ref: 48f5886605e95aa863e236a68602a1c4d6ced7fc
parent: c4e5c54d69920c07f5d421ba805da1a4c9c3e82d
author: Linfeng Zhang <[email protected]>
date: Wed Mar 8 05:46:33 EST 2017

Add vpx_highbd_idct32x32_135_add_c()

When eob is less than or equal to 135 for high-bitdepth 32x32 idct,
call this function.

BUG=webm:1301

Change-Id: I8a5864f5c076e449c984e602946547a7b09c9fe6

--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -298,6 +298,15 @@
       &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 12, 2),
   make_tuple(
       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_135_add_c>, TX_32X32, 135, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_135_add_c>, TX_32X32, 135, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_135_add_c>, TX_32X32, 135, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
       &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 8, 2),
   make_tuple(
       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -363,6 +363,8 @@
     vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
   } else if (eob <= 34) {
     vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
+  } else if (eob <= 135) {
+    vpx_highbd_idct32x32_135_add(input, dest, stride, bd);
   } else {
     vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
   }
--- a/vpx_dsp/inv_txfm.c
+++ b/vpx_dsp/inv_txfm.c
@@ -2569,6 +2569,35 @@
   }
 }
 
+void vpx_highbd_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest8,
+                                    int stride, int bd) {
+  int i, j;
+  tran_low_t out[32 * 32] = { 0 };
+  tran_low_t *outptr = out;
+  tran_low_t temp_in[32], temp_out[32];
+  uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8);
+
+  // Rows
+  // Only upper-left 16x16 has non-zero coeff
+  for (i = 0; i < 16; ++i) {
+    highbd_idct32_c(input, outptr, bd);
+    input += 32;
+    outptr += 32;
+  }
+
+  // Columns
+  for (i = 0; i < 32; ++i) {
+    uint16_t *destT = dest;
+    for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+    highbd_idct32_c(temp_in, temp_out, bd);
+    for (j = 0; j < 32; ++j) {
+      destT[i] = highbd_clip_pixel_add(destT[i],
+                                       ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+      destT += stride;
+    }
+  }
+}
+
 void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
                                    int stride, int bd) {
   int i, j;
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -628,6 +628,8 @@
 
   add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
 
+  add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+
   add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
 
   add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";