shithub: libvpx

--- a/vp9/common/vp9_postproc.c

+++ b/vp9/common/vp9_postproc.c

@@ -469,7 +469,7 @@

   uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};

   const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};

-  for (i = 0; i < MAX_MB_PLANE; ++i)

+  for (i = 0; i < MAX_MB_PLANE; ++i) {

 #if CONFIG_VP9_HIGHBITDEPTH

     assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==

            (dst->flags & YV12_FLAG_HIGHBITDEPTH));

@@ -488,6 +488,7 @@

                                   src_strides[i], dst_strides[i],

                                   src_heights[i], src_widths[i], ppl);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

+  }

 void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,

--- a/vp9/common/vp9_rtcd_defs.pl

+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -1802,11 +1802,8 @@

   # Structured Similarity (SSIM)

   if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {

-    add_proto qw/void vp9_high_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";

-    specialize qw/vp9_high_ssim_parms_8x8/;

-    add_proto qw/void vp9_high_ssim_parms_8x8_shift/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr, unsigned int bd, unsigned int shift";

-    specialize qw/vp9_high_ssim_parms_8x8_shift/;

+    add_proto qw/void vp9_highbd_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";

+    specialize qw/vp9_highbd_ssim_parms_8x8/;

   # fdct functions

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -1712,15 +1712,16 @@

                              - cpi->first_time_stamp_ever) / 10000000.000;

       double total_encode_time = (cpi->time_receive_data +

                                   cpi->time_compress_data)   / 1000.000;

-      double dr = (double)cpi->bytes * (double) 8 / (double)1000

-                  / time_encoded;

+      const double dr =

+          (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;

+      const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);

       if (cpi->b_calculate_psnr) {

         const double total_psnr =

-            vpx_sse_to_psnr((double)cpi->total_samples, 255.0,

+            vpx_sse_to_psnr((double)cpi->total_samples, peak,

                             (double)cpi->total_sq_error);

         const double totalp_psnr =

-            vpx_sse_to_psnr((double)cpi->totalp_samples, 255.0,

+            vpx_sse_to_psnr((double)cpi->totalp_samples, peak,

                             (double)cpi->totalp_sq_error);

         const double total_ssim = 100 * pow(cpi->summed_quality /

                                                 cpi->summed_weights, 8.0);

@@ -1914,6 +1915,7 @@

 static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,

                       PSNR_STATS *psnr) {

+  static const double peak = 255.0;

   const int widths[3]        = {a->y_width,  a->uv_width,  a->uv_width };

   const int heights[3]       = {a->y_height, a->uv_height, a->uv_height};

   const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer,  a->v_buffer };

@@ -1933,7 +1935,7 @@

                                  w, h);

     psnr->sse[1 + i] = sse;

     psnr->samples[1 + i] = samples;

-    psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse);

+    psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);

     total_sse += sse;

     total_samples += samples;

@@ -1941,7 +1943,7 @@

   psnr->sse[0] = total_sse;

   psnr->samples[0] = total_samples;

-  psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, 255.0,

+  psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,

                                   (double)total_sse);

@@ -3699,7 +3701,7 @@

           vp9_clear_system_state();

 #if CONFIG_VP9_HIGHBITDEPTH

-          calc_highbd_psnr(orig, recon, &psnr, cpi->mb.e_mbd.bd,

+          calc_highbd_psnr(orig, pp, &psnr, cpi->mb.e_mbd.bd,

                            cpi->oxcf.input_bit_depth);

 #else

           calc_psnr(orig, pp, &psnr2);

@@ -3714,11 +3716,9 @@

 #if CONFIG_VP9_HIGHBITDEPTH

           if (cm->use_highbitdepth) {

-            frame_ssim2 = vp9_highbd_calc_ssim(

-                orig, recon, &weight, xd->bd,

-                xd->bd - cpi->oxcf.input_bit_depth);

+            frame_ssim2 = vp9_highbd_calc_ssim(orig, recon, &weight, xd->bd);

           } else {

-            frame_ssim2 = vp9_calc_ssim(orig, recon, 1, &weight);

+            frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);

 #else

           frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);

@@ -3730,8 +3730,7 @@

 #if CONFIG_VP9_HIGHBITDEPTH

           if (cm->use_highbitdepth) {

             frame_ssim2 = vp9_highbd_calc_ssim(

-                orig, &cm->post_proc_buffer, &weight,

-                xd->bd, xd->bd - cpi->oxcf.input_bit_depth);

+                orig, &cm->post_proc_buffer, &weight, xd->bd);

           } else {

             frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight);

@@ -3757,10 +3756,9 @@

       if (cpi->b_calculate_ssimg) {

         double y, u, v, frame_all;

 #if CONFIG_VP9_HIGHBITDEPTH

-        if (cm->use_high) {

+        if (cm->use_highbitdepth) {

           frame_all = vp9_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,

-                                            &u, &v, xd->bd,

-                                            xd->bd - cpi->oxcf.input_bit_depth);

+                                            &u, &v, xd->bd);

         } else {

           frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,

                                      &v);

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -246,11 +246,11 @@

     } else {

 #if CONFIG_VP9_HIGHBITDEPTH

       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

-        vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],

+        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],

                                      pd->dequant[1] >> (xd->bd - 5),

                                      &rate, &dist);

       } else {

-        vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],

+        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],

                                      pd->dequant[1] >> 3, &rate, &dist);

 #else

--- a/vp9/encoder/vp9_ssim.c

+++ b/vp9/encoder/vp9_ssim.c

@@ -59,26 +59,6 @@

-void vp9_highbd_ssim_parms_8x8_shift_c(uint16_t *s, int sp, uint16_t *r, int rp,

-                                       uint32_t *sum_s, uint32_t *sum_r,

-                                       uint32_t *sum_sq_s, uint32_t *sum_sq_r,

-                                       uint32_t *sum_sxr, unsigned int bd,

-                                       unsigned int shift) {

-  int i, j;

-  const int max_val = (1 << bd) - 1;

-  for (i = 0; i < 8; i++, s += sp, r += rp) {

-    for (j = 0; j < 8; j++) {

-      int sj = s[j];

-      int rj = r[j];

-      *sum_s += sj;

-      *sum_r += rj;

-      *sum_sq_s += sj * sj;

-      *sum_sq_r += rj * rj;

-      *sum_sxr += sj * rj;

-    }

-  }

-}

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 static const int64_t cc1 =  26634;  // (64^2*(.01*255)^2

@@ -112,25 +92,10 @@

 #if CONFIG_VP9_HIGHBITDEPTH

-static double high_ssim_8x8_shift(uint16_t *s, int sp, uint16_t *r, int rp,

-                                  unsigned int bd, unsigned int shift) {

+static double highbd_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,

+                              unsigned int bd) {

   uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;

   const int oshift = bd - 8;

-  vp9_highbd_ssim_parms_8x8_shift(s, sp, r, rp, &sum_s,

-                                  &sum_r, &sum_sq_s, &sum_sq_r,

-                                  &sum_sxr, bd, shift);

-  return similarity(sum_s >> oshift,

-                    sum_r >> oshift,

-                    sum_sq_s >> (2 * oshift),

-                    sum_sq_r >> (2 * oshift),

-                    sum_sxr >> (2 * oshift),

-                    64);

-}

-static double high_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,

-                            unsigned int bd) {

-  uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;

-  const int oshift = bd - 8;

   vp9_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,

                             &sum_sxr);

   return similarity(sum_s >> oshift,

@@ -167,36 +132,20 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 double vp9_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,

                         int stride_img2, int width, int height,

-                        unsigned int bd, unsigned int shift) {

+                        unsigned int bd) {

   int i, j;

   int samples = 0;

   double ssim_total = 0;

-  if (shift) {

-    // sample point start with each 4x4 location

-    for (i = 0; i <= height - 8;

-         i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {

-      for (j = 0; j <= width - 8; j += 4) {

-        double v = high_ssim_8x8_shift(CONVERT_TO_SHORTPTR(img1 + j),

-                                       stride_img1,

-                                       CONVERT_TO_SHORTPTR(img2 + j),

-                                       stride_img2,

-                                       bd, shift);

-        ssim_total += v;

-        samples++;

-      }

-    }

-  } else {

-    // sample point start with each 4x4 location

-    for (i = 0; i <= height - 8;

-         i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {

-      for (j = 0; j <= width - 8; j += 4) {

-        double v = high_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,

+  // sample point start with each 4x4 location

+  for (i = 0; i <= height - 8;

+       i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {

+    for (j = 0; j <= width - 8; j += 4) {

+      double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,

                                  CONVERT_TO_SHORTPTR(img2 + j), stride_img2,

                                  bd);

-        ssim_total += v;

-        samples++;

-      }

+      ssim_total += v;

+      samples++;

   ssim_total /= samples;

@@ -255,25 +204,21 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,

                             YV12_BUFFER_CONFIG *dest,

-                            double *weight, unsigned int bd,

-                            unsigned int shift) {

+                            double *weight, unsigned int bd) {

   double a, b, c;

   double ssimv;

   a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,

                        source->y_stride, dest->y_stride,

-                       source->y_crop_width, source->y_crop_height,

-                       bd, shift);

+                       source->y_crop_width, source->y_crop_height, bd);

   b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,

                        source->uv_stride, dest->uv_stride,

-                       source->uv_crop_width, source->uv_crop_height,

-                       bd, shift);

+                       source->uv_crop_width, source->uv_crop_height, bd);

   c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,

                        source->uv_stride, dest->uv_stride,

-                       source->uv_crop_width, source->uv_crop_height,

-                       bd, shift);

+                       source->uv_crop_width, source->uv_crop_height, bd);

   ssimv = a * .8 + .1 * (b + c);

@@ -284,25 +229,21 @@

 double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,

                              YV12_BUFFER_CONFIG *dest, double *ssim_y,

-                             double *ssim_u, double *ssim_v,

-                             unsigned int bd, unsigned int shift) {

+                             double *ssim_u, double *ssim_v, unsigned int bd) {

   double ssim_all = 0;

   double a, b, c;

   a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,

                        source->y_stride, dest->y_stride,

-                       source->y_crop_width, source->y_crop_height,

-                       bd, shift);

+                       source->y_crop_width, source->y_crop_height, bd);

   b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,

                        source->uv_stride, dest->uv_stride,

-                       source->uv_crop_width, source->uv_crop_height,

-                       bd, shift);

+                       source->uv_crop_width, source->uv_crop_height, bd);

   c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,

                        source->uv_stride, dest->uv_stride,

-                       source->uv_crop_width, source->uv_crop_height,

-                       bd, shift);

+                       source->uv_crop_width, source->uv_crop_height, bd);

   *ssim_y = a;

   *ssim_u = b;

   *ssim_v = c;

--- a/vp9/encoder/vp9_ssim.h

+++ b/vp9/encoder/vp9_ssim.h

@@ -27,8 +27,7 @@

 double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,

                             YV12_BUFFER_CONFIG *dest,

                             double *weight,

-                            unsigned int bd,

-                            unsigned int shift);

+                            unsigned int bd);

 double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,

                              YV12_BUFFER_CONFIG *dest,

@@ -35,8 +34,7 @@

                              double *ssim_y,

                              double *ssim_u,

                              double *ssim_v,

-                             unsigned int bps,

-                             unsigned int shift);

+                             unsigned int bd);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #ifdef __cplusplus

--- a/vp9/encoder/vp9_tokenize.c

+++ b/vp9/encoder/vp9_tokenize.c

@@ -261,7 +261,7 @@

 static INLINE void add_token(TOKENEXTRA **t, const vp9_prob *context_tree,

-                             int16_t extra, uint8_t token,

+                             int32_t extra, uint8_t token,

                              uint8_t skip_eob_node,

                              unsigned int *counts) {

   (*t)->token = token;

@@ -329,7 +329,7 @@

   scan = so->scan;

   nb = so->neighbors;

   c = 0;

-#if CONFIG_VP9_HIGH && CONFIG_HIGH_QUANT

+#if CONFIG_VP9_HIGHBITDEPTH

   if (cpi->common.profile >= PROFILE_2) {

     dct_value_tokens = (cpi->common.bit_depth == VPX_BITS_10 ?

                         vp9_dct_value_tokens_high10_ptr :

--- a/vp9/encoder/vp9_tokenize.h

+++ b/vp9/encoder/vp9_tokenize.h

@@ -26,12 +26,20 @@

 typedef struct {

   int16_t token;

+#if CONFIG_VP9_HIGHBITDEPTH

+  int32_t extra;

+#else

   int16_t extra;

+#endif

 } TOKENVALUE;

 typedef struct {

   const vp9_prob *context_tree;

+#if CONFIG_VP9_HIGHBITDEPTH

+  int32_t extra;

+#else

   int16_t         extra;

+#endif

   uint8_t         token;

   uint8_t         skip_eob_node;

 } TOKENEXTRA;

--- a/vp9/vp9_iface_common.h

+++ b/vp9/vp9_iface_common.h

@@ -113,8 +113,6 @@

 #else

   yv12->border  = (img->stride[VPX_PLANE_Y] - img->w) / 2;

 #endif  // CONFIG_VP9_HIGHBITDEPTH

-  yv12->border  = (img->stride[VPX_PLANE_Y] - img->w) / 2;

   return VPX_CODEC_OK;

--- a/vpx/src/vpx_image.c

+++ b/vpx/src/vpx_image.c

@@ -15,17 +15,17 @@

 #include "vpx/vpx_integer.h"

 #include "vpx_mem/vpx_mem.h"

-static vpx_image_t *img_alloc_helper(vpx_image_t   *img,

-                                     vpx_img_fmt_t  fmt,

-                                     unsigned int   d_w,

-                                     unsigned int   d_h,

-                                     unsigned int   buf_align,

-                                     unsigned int   stride_align,

+static vpx_image_t *img_alloc_helper(vpx_image_t *img,

+                                     vpx_img_fmt_t fmt,

+                                     unsigned int d_w,

+                                     unsigned int d_h,

+                                     unsigned int buf_align,

+                                     unsigned int stride_align,

                                      unsigned char *img_data) {

+  unsigned int h, w, s, xcs, ycs, bps;

+  unsigned int stride_in_bytes;

+  int align;

-  unsigned int  h, w, s, xcs, ycs, bps;

-  int           align;

   /* Treat align==0 like align==1 */

   if (!buf_align)

     buf_align = 1;

@@ -125,6 +125,7 @@

   h = (d_h + align) & ~align;

   s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8;

   s = (s + stride_align - 1) & ~(stride_align - 1);

+  stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s;

   /* Allocate the new image */

   if (!img) {

@@ -163,8 +164,8 @@

   img->bps = bps;

   /* Calculate strides */

-  img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = s;

-  img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = s >> xcs;

+  img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = stride_in_bytes;

+  img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = stride_in_bytes >> xcs;

   /* Default viewport to entire image */

   if (!vpx_img_set_rect(img, 0, 0, d_w, d_h))

--- a/vpxenc.c

+++ b/vpxenc.c

@@ -1695,7 +1695,7 @@

-static void show_psnr(struct stream_state  *stream) {

+static void show_psnr(struct stream_state  *stream, double peak) {

   int i;

   double ovpsnr;

@@ -1703,7 +1703,7 @@

     return;

   fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index);

-  ovpsnr = sse_to_psnr((double)stream->psnr_samples_total, 255.0,

+  ovpsnr = sse_to_psnr((double)stream->psnr_samples_total, peak,

                        (double)stream->psnr_sse_total);

   fprintf(stderr, " %.3f", ovpsnr);

@@ -1784,8 +1784,8 @@

     int h = src->h;

     int x, y;

     if (plane) {

-      w >>= src->x_chroma_shift;

-      h >>= src->y_chroma_shift;

+      w = (w + src->x_chroma_shift) >> src->x_chroma_shift;

+      h = (h + src->y_chroma_shift) >> src->y_chroma_shift;

     for (y = 0; y < h; y++) {

       uint8_t *p_src = src->planes[plane] + y * src->stride[plane];

@@ -2272,24 +2272,29 @@

     if (stream_cnt > 1)

       fprintf(stderr, "\n");

-    if (!global.quiet)

-      FOREACH_STREAM(fprintf(

-                       stderr,

-                       "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"

-                       " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,

-                       global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,

-                       seen_frames ? (unsigned long)(stream->nbytes * 8 / seen_frames) : 0,

-                       seen_frames ? (int64_t)stream->nbytes * 8

-                       * (int64_t)global.framerate.num / global.framerate.den

-                       / seen_frames

-                       : 0,

-                       stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,

-                       stream->cx_time > 9999999 ? "ms" : "us",

-                       usec_to_fps(stream->cx_time, seen_frames));

-                    );

+    if (!global.quiet) {

+      FOREACH_STREAM(fprintf(stderr,

+          "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7"PRId64"b/f %7"PRId64"b/s"

+          " %7"PRId64" %s (%.2f fps)\033[K\n",

+          pass + 1,

+          global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,

+          seen_frames ? (int64_t)(stream->nbytes * 8 / seen_frames) : 0,

+          seen_frames ? (int64_t)stream->nbytes * 8 *

+              (int64_t)global.framerate.num / global.framerate.den /

+              seen_frames : 0,

+          stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,

+          stream->cx_time > 9999999 ? "ms" : "us",

+          usec_to_fps(stream->cx_time, seen_frames)));

+    }

-    if (global.show_psnr)

-      FOREACH_STREAM(show_psnr(stream));

+    if (global.show_psnr) {

+      if (global.codec->fourcc == VP9_FOURCC) {

+        FOREACH_STREAM(

+            show_psnr(stream, (1 << stream->config.cfg.g_input_bit_depth) - 1));

+      } else {

+        FOREACH_STREAM(show_psnr(stream, 255.0));

+      }

+    }

     FOREACH_STREAM(vpx_codec_destroy(&stream->encoder));