shithub: libvpx

Download patch

ref: a160d72522fd8572943966e96b65dd232fe048ea
parent: 10c7876a8e5a638c89eb06913e7c44a756e6d35d
author: Deb Mukherjee <[email protected]>
date: Tue Sep 30 17:56:33 EDT 2014

High-bitdepth bugfixes

Miscellaneous bug-fixes for high bitdepth functionality.
With this patch, high bit-depth profiles become mostly functional,
except for an intermittent assert failure issue that is being
tracked.

Change-Id: I6a7fcbdcf1e5b09842e88535f8442d2e1230748c

--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -469,7 +469,7 @@
   uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
   const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
 
-  for (i = 0; i < MAX_MB_PLANE; ++i)
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
 #if CONFIG_VP9_HIGHBITDEPTH
     assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==
            (dst->flags & YV12_FLAG_HIGHBITDEPTH));
@@ -488,6 +488,7 @@
                                   src_strides[i], dst_strides[i],
                                   src_heights[i], src_widths[i], ppl);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+  }
 }
 
 void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1802,11 +1802,8 @@
   # Structured Similarity (SSIM)
   #
   if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
-    add_proto qw/void vp9_high_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
-    specialize qw/vp9_high_ssim_parms_8x8/;
-
-    add_proto qw/void vp9_high_ssim_parms_8x8_shift/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr, unsigned int bd, unsigned int shift";
-    specialize qw/vp9_high_ssim_parms_8x8_shift/;
+    add_proto qw/void vp9_highbd_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+    specialize qw/vp9_highbd_ssim_parms_8x8/;
   }
 
   # fdct functions
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1712,15 +1712,16 @@
                              - cpi->first_time_stamp_ever) / 10000000.000;
       double total_encode_time = (cpi->time_receive_data +
                                   cpi->time_compress_data)   / 1000.000;
-      double dr = (double)cpi->bytes * (double) 8 / (double)1000
-                  / time_encoded;
+      const double dr =
+          (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
+      const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
 
       if (cpi->b_calculate_psnr) {
         const double total_psnr =
-            vpx_sse_to_psnr((double)cpi->total_samples, 255.0,
+            vpx_sse_to_psnr((double)cpi->total_samples, peak,
                             (double)cpi->total_sq_error);
         const double totalp_psnr =
-            vpx_sse_to_psnr((double)cpi->totalp_samples, 255.0,
+            vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
                             (double)cpi->totalp_sq_error);
         const double total_ssim = 100 * pow(cpi->summed_quality /
                                                 cpi->summed_weights, 8.0);
@@ -1914,6 +1915,7 @@
 
 static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
                       PSNR_STATS *psnr) {
+  static const double peak = 255.0;
   const int widths[3]        = {a->y_width,  a->uv_width,  a->uv_width };
   const int heights[3]       = {a->y_height, a->uv_height, a->uv_height};
   const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer,  a->v_buffer };
@@ -1933,7 +1935,7 @@
                                  w, h);
     psnr->sse[1 + i] = sse;
     psnr->samples[1 + i] = samples;
-    psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse);
+    psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
 
     total_sse += sse;
     total_samples += samples;
@@ -1941,7 +1943,7 @@
 
   psnr->sse[0] = total_sse;
   psnr->samples[0] = total_samples;
-  psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, 255.0,
+  psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
                                   (double)total_sse);
 }
 
@@ -3699,7 +3701,7 @@
           vp9_clear_system_state();
 
 #if CONFIG_VP9_HIGHBITDEPTH
-          calc_highbd_psnr(orig, recon, &psnr, cpi->mb.e_mbd.bd,
+          calc_highbd_psnr(orig, pp, &psnr, cpi->mb.e_mbd.bd,
                            cpi->oxcf.input_bit_depth);
 #else
           calc_psnr(orig, pp, &psnr2);
@@ -3714,11 +3716,9 @@
 
 #if CONFIG_VP9_HIGHBITDEPTH
           if (cm->use_highbitdepth) {
-            frame_ssim2 = vp9_highbd_calc_ssim(
-                orig, recon, &weight, xd->bd,
-                xd->bd - cpi->oxcf.input_bit_depth);
+            frame_ssim2 = vp9_highbd_calc_ssim(orig, recon, &weight, xd->bd);
           } else {
-            frame_ssim2 = vp9_calc_ssim(orig, recon, 1, &weight);
+            frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
           }
 #else
           frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
@@ -3730,8 +3730,7 @@
 #if CONFIG_VP9_HIGHBITDEPTH
           if (cm->use_highbitdepth) {
             frame_ssim2 = vp9_highbd_calc_ssim(
-                orig, &cm->post_proc_buffer, &weight,
-                xd->bd, xd->bd - cpi->oxcf.input_bit_depth);
+                orig, &cm->post_proc_buffer, &weight, xd->bd);
           } else {
             frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight);
           }
@@ -3757,10 +3756,9 @@
       if (cpi->b_calculate_ssimg) {
         double y, u, v, frame_all;
 #if CONFIG_VP9_HIGHBITDEPTH
-        if (cm->use_high) {
+        if (cm->use_highbitdepth) {
           frame_all = vp9_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,
-                                            &u, &v, xd->bd,
-                                            xd->bd - cpi->oxcf.input_bit_depth);
+                                            &u, &v, xd->bd);
         } else {
           frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,
                                      &v);
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -246,11 +246,11 @@
     } else {
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
                                      pd->dequant[1] >> (xd->bd - 5),
                                      &rate, &dist);
       } else {
-        vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
                                      pd->dequant[1] >> 3, &rate, &dist);
       }
 #else
--- a/vp9/encoder/vp9_ssim.c
+++ b/vp9/encoder/vp9_ssim.c
@@ -59,26 +59,6 @@
     }
   }
 }
-
-void vp9_highbd_ssim_parms_8x8_shift_c(uint16_t *s, int sp, uint16_t *r, int rp,
-                                       uint32_t *sum_s, uint32_t *sum_r,
-                                       uint32_t *sum_sq_s, uint32_t *sum_sq_r,
-                                       uint32_t *sum_sxr, unsigned int bd,
-                                       unsigned int shift) {
-  int i, j;
-  const int max_val = (1 << bd) - 1;
-  for (i = 0; i < 8; i++, s += sp, r += rp) {
-    for (j = 0; j < 8; j++) {
-      int sj = s[j];
-      int rj = r[j];
-      *sum_s += sj;
-      *sum_r += rj;
-      *sum_sq_s += sj * sj;
-      *sum_sq_r += rj * rj;
-      *sum_sxr += sj * rj;
-    }
-  }
-}
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 static const int64_t cc1 =  26634;  // (64^2*(.01*255)^2
@@ -112,25 +92,10 @@
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-static double high_ssim_8x8_shift(uint16_t *s, int sp, uint16_t *r, int rp,
-                                  unsigned int bd, unsigned int shift) {
+static double highbd_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,
+                              unsigned int bd) {
   uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
   const int oshift = bd - 8;
-  vp9_highbd_ssim_parms_8x8_shift(s, sp, r, rp, &sum_s,
-                                  &sum_r, &sum_sq_s, &sum_sq_r,
-                                  &sum_sxr, bd, shift);
-  return similarity(sum_s >> oshift,
-                    sum_r >> oshift,
-                    sum_sq_s >> (2 * oshift),
-                    sum_sq_r >> (2 * oshift),
-                    sum_sxr >> (2 * oshift),
-                    64);
-}
-
-static double high_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,
-                            unsigned int bd) {
-  uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
-  const int oshift = bd - 8;
   vp9_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
                             &sum_sxr);
   return similarity(sum_s >> oshift,
@@ -167,36 +132,20 @@
 #if CONFIG_VP9_HIGHBITDEPTH
 double vp9_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
                         int stride_img2, int width, int height,
-                        unsigned int bd, unsigned int shift) {
+                        unsigned int bd) {
   int i, j;
   int samples = 0;
   double ssim_total = 0;
 
-  if (shift) {
-    // sample point start with each 4x4 location
-    for (i = 0; i <= height - 8;
-         i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
-      for (j = 0; j <= width - 8; j += 4) {
-        double v = high_ssim_8x8_shift(CONVERT_TO_SHORTPTR(img1 + j),
-                                       stride_img1,
-                                       CONVERT_TO_SHORTPTR(img2 + j),
-                                       stride_img2,
-                                       bd, shift);
-        ssim_total += v;
-        samples++;
-      }
-    }
-  } else {
-    // sample point start with each 4x4 location
-    for (i = 0; i <= height - 8;
-         i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
-      for (j = 0; j <= width - 8; j += 4) {
-        double v = high_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,
+  // sample point start with each 4x4 location
+  for (i = 0; i <= height - 8;
+       i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
+    for (j = 0; j <= width - 8; j += 4) {
+      double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,
                                  CONVERT_TO_SHORTPTR(img2 + j), stride_img2,
                                  bd);
-        ssim_total += v;
-        samples++;
-      }
+      ssim_total += v;
+      samples++;
     }
   }
   ssim_total /= samples;
@@ -255,25 +204,21 @@
 #if CONFIG_VP9_HIGHBITDEPTH
 double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
                             YV12_BUFFER_CONFIG *dest,
-                            double *weight, unsigned int bd,
-                            unsigned int shift) {
+                            double *weight, unsigned int bd) {
   double a, b, c;
   double ssimv;
 
   a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,
                        source->y_stride, dest->y_stride,
-                       source->y_crop_width, source->y_crop_height,
-                       bd, shift);
+                       source->y_crop_width, source->y_crop_height, bd);
 
   b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,
                        source->uv_stride, dest->uv_stride,
-                       source->uv_crop_width, source->uv_crop_height,
-                       bd, shift);
+                       source->uv_crop_width, source->uv_crop_height, bd);
 
   c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,
                        source->uv_stride, dest->uv_stride,
-                       source->uv_crop_width, source->uv_crop_height,
-                       bd, shift);
+                       source->uv_crop_width, source->uv_crop_height, bd);
 
   ssimv = a * .8 + .1 * (b + c);
 
@@ -284,25 +229,21 @@
 
 double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
                              YV12_BUFFER_CONFIG *dest, double *ssim_y,
-                             double *ssim_u, double *ssim_v,
-                             unsigned int bd, unsigned int shift) {
+                             double *ssim_u, double *ssim_v, unsigned int bd) {
   double ssim_all = 0;
   double a, b, c;
 
   a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,
                        source->y_stride, dest->y_stride,
-                       source->y_crop_width, source->y_crop_height,
-                       bd, shift);
+                       source->y_crop_width, source->y_crop_height, bd);
 
   b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,
                        source->uv_stride, dest->uv_stride,
-                       source->uv_crop_width, source->uv_crop_height,
-                       bd, shift);
+                       source->uv_crop_width, source->uv_crop_height, bd);
 
   c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,
                        source->uv_stride, dest->uv_stride,
-                       source->uv_crop_width, source->uv_crop_height,
-                       bd, shift);
+                       source->uv_crop_width, source->uv_crop_height, bd);
   *ssim_y = a;
   *ssim_u = b;
   *ssim_v = c;
--- a/vp9/encoder/vp9_ssim.h
+++ b/vp9/encoder/vp9_ssim.h
@@ -27,8 +27,7 @@
 double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
                             YV12_BUFFER_CONFIG *dest,
                             double *weight,
-                            unsigned int bd,
-                            unsigned int shift);
+                            unsigned int bd);
 
 double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
                              YV12_BUFFER_CONFIG *dest,
@@ -35,8 +34,7 @@
                              double *ssim_y,
                              double *ssim_u,
                              double *ssim_v,
-                             unsigned int bps,
-                             unsigned int shift);
+                             unsigned int bd);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #ifdef __cplusplus
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -261,7 +261,7 @@
 }
 
 static INLINE void add_token(TOKENEXTRA **t, const vp9_prob *context_tree,
-                             int16_t extra, uint8_t token,
+                             int32_t extra, uint8_t token,
                              uint8_t skip_eob_node,
                              unsigned int *counts) {
   (*t)->token = token;
@@ -329,7 +329,7 @@
   scan = so->scan;
   nb = so->neighbors;
   c = 0;
-#if CONFIG_VP9_HIGH && CONFIG_HIGH_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
   if (cpi->common.profile >= PROFILE_2) {
     dct_value_tokens = (cpi->common.bit_depth == VPX_BITS_10 ?
                         vp9_dct_value_tokens_high10_ptr :
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -26,12 +26,20 @@
 
 typedef struct {
   int16_t token;
+#if CONFIG_VP9_HIGHBITDEPTH
+  int32_t extra;
+#else
   int16_t extra;
+#endif
 } TOKENVALUE;
 
 typedef struct {
   const vp9_prob *context_tree;
+#if CONFIG_VP9_HIGHBITDEPTH
+  int32_t extra;
+#else
   int16_t         extra;
+#endif
   uint8_t         token;
   uint8_t         skip_eob_node;
 } TOKENEXTRA;
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -113,8 +113,6 @@
 #else
   yv12->border  = (img->stride[VPX_PLANE_Y] - img->w) / 2;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-
-  yv12->border  = (img->stride[VPX_PLANE_Y] - img->w) / 2;
   return VPX_CODEC_OK;
 }
 
--- a/vpx/src/vpx_image.c
+++ b/vpx/src/vpx_image.c
@@ -15,17 +15,17 @@
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
 
-static vpx_image_t *img_alloc_helper(vpx_image_t   *img,
-                                     vpx_img_fmt_t  fmt,
-                                     unsigned int   d_w,
-                                     unsigned int   d_h,
-                                     unsigned int   buf_align,
-                                     unsigned int   stride_align,
+static vpx_image_t *img_alloc_helper(vpx_image_t *img,
+                                     vpx_img_fmt_t fmt,
+                                     unsigned int d_w,
+                                     unsigned int d_h,
+                                     unsigned int buf_align,
+                                     unsigned int stride_align,
                                      unsigned char *img_data) {
+  unsigned int h, w, s, xcs, ycs, bps;
+  unsigned int stride_in_bytes;
+  int align;
 
-  unsigned int  h, w, s, xcs, ycs, bps;
-  int           align;
-
   /* Treat align==0 like align==1 */
   if (!buf_align)
     buf_align = 1;
@@ -125,6 +125,7 @@
   h = (d_h + align) & ~align;
   s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8;
   s = (s + stride_align - 1) & ~(stride_align - 1);
+  stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s;
 
   /* Allocate the new image */
   if (!img) {
@@ -163,8 +164,8 @@
   img->bps = bps;
 
   /* Calculate strides */
-  img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = s;
-  img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = s >> xcs;
+  img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = stride_in_bytes;
+  img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = stride_in_bytes >> xcs;
 
   /* Default viewport to entire image */
   if (!vpx_img_set_rect(img, 0, 0, d_w, d_h))
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1695,7 +1695,7 @@
 }
 
 
-static void show_psnr(struct stream_state  *stream) {
+static void show_psnr(struct stream_state  *stream, double peak) {
   int i;
   double ovpsnr;
 
@@ -1703,7 +1703,7 @@
     return;
 
   fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index);
-  ovpsnr = sse_to_psnr((double)stream->psnr_samples_total, 255.0,
+  ovpsnr = sse_to_psnr((double)stream->psnr_samples_total, peak,
                        (double)stream->psnr_sse_total);
   fprintf(stderr, " %.3f", ovpsnr);
 
@@ -1784,8 +1784,8 @@
     int h = src->h;
     int x, y;
     if (plane) {
-      w >>= src->x_chroma_shift;
-      h >>= src->y_chroma_shift;
+      w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
+      h = (h + src->y_chroma_shift) >> src->y_chroma_shift;
     }
     for (y = 0; y < h; y++) {
       uint8_t *p_src = src->planes[plane] + y * src->stride[plane];
@@ -2272,24 +2272,29 @@
     if (stream_cnt > 1)
       fprintf(stderr, "\n");
 
-    if (!global.quiet)
-      FOREACH_STREAM(fprintf(
-                       stderr,
-                       "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"
-                       " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,
-                       global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
-                       seen_frames ? (unsigned long)(stream->nbytes * 8 / seen_frames) : 0,
-                       seen_frames ? (int64_t)stream->nbytes * 8
-                       * (int64_t)global.framerate.num / global.framerate.den
-                       / seen_frames
-                       : 0,
-                       stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
-                       stream->cx_time > 9999999 ? "ms" : "us",
-                       usec_to_fps(stream->cx_time, seen_frames));
-                    );
+    if (!global.quiet) {
+      FOREACH_STREAM(fprintf(stderr,
+          "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7"PRId64"b/f %7"PRId64"b/s"
+          " %7"PRId64" %s (%.2f fps)\033[K\n",
+          pass + 1,
+          global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
+          seen_frames ? (int64_t)(stream->nbytes * 8 / seen_frames) : 0,
+          seen_frames ? (int64_t)stream->nbytes * 8 *
+              (int64_t)global.framerate.num / global.framerate.den /
+              seen_frames : 0,
+          stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
+          stream->cx_time > 9999999 ? "ms" : "us",
+          usec_to_fps(stream->cx_time, seen_frames)));
+    }
 
-    if (global.show_psnr)
-      FOREACH_STREAM(show_psnr(stream));
+    if (global.show_psnr) {
+      if (global.codec->fourcc == VP9_FOURCC) {
+        FOREACH_STREAM(
+            show_psnr(stream, (1 << stream->config.cfg.g_input_bit_depth) - 1));
+      } else {
+        FOREACH_STREAM(show_psnr(stream, 255.0));
+      }
+    }
 
     FOREACH_STREAM(vpx_codec_destroy(&stream->encoder));