ref: a160d72522fd8572943966e96b65dd232fe048ea
parent: 10c7876a8e5a638c89eb06913e7c44a756e6d35d
author: Deb Mukherjee <[email protected]>
date: Tue Sep 30 17:56:33 EDT 2014
High-bitdepth bugfixes Miscellaneous bug-fixes for high bitdepth functionality. With this patch, high bit-depth profiles become mostly functional, except for an intermittent assert failure issue that is being tracked. Change-Id: I6a7fcbdcf1e5b09842e88535f8442d2e1230748c
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -469,7 +469,7 @@
uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
- for (i = 0; i < MAX_MB_PLANE; ++i)
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
#if CONFIG_VP9_HIGHBITDEPTH
assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==
(dst->flags & YV12_FLAG_HIGHBITDEPTH));
@@ -488,6 +488,7 @@
src_strides[i], dst_strides[i],
src_heights[i], src_widths[i], ppl);
#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
}
void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1802,11 +1802,8 @@
# Structured Similarity (SSIM)
#
if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
- add_proto qw/void vp9_high_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/vp9_high_ssim_parms_8x8/;
-
- add_proto qw/void vp9_high_ssim_parms_8x8_shift/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr, unsigned int bd, unsigned int shift";
- specialize qw/vp9_high_ssim_parms_8x8_shift/;
+ add_proto qw/void vp9_highbd_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+ specialize qw/vp9_highbd_ssim_parms_8x8/;
}
# fdct functions
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1712,15 +1712,16 @@
- cpi->first_time_stamp_ever) / 10000000.000;
double total_encode_time = (cpi->time_receive_data +
cpi->time_compress_data) / 1000.000;
- double dr = (double)cpi->bytes * (double) 8 / (double)1000
- / time_encoded;
+ const double dr =
+ (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
+ const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
if (cpi->b_calculate_psnr) {
const double total_psnr =
- vpx_sse_to_psnr((double)cpi->total_samples, 255.0,
+ vpx_sse_to_psnr((double)cpi->total_samples, peak,
(double)cpi->total_sq_error);
const double totalp_psnr =
- vpx_sse_to_psnr((double)cpi->totalp_samples, 255.0,
+ vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
(double)cpi->totalp_sq_error);
const double total_ssim = 100 * pow(cpi->summed_quality /
cpi->summed_weights, 8.0);
@@ -1914,6 +1915,7 @@
static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
PSNR_STATS *psnr) {
+ static const double peak = 255.0;
const int widths[3] = {a->y_width, a->uv_width, a->uv_width };
const int heights[3] = {a->y_height, a->uv_height, a->uv_height};
const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer };
@@ -1933,7 +1935,7 @@
w, h);
psnr->sse[1 + i] = sse;
psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse);
+ psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
total_sse += sse;
total_samples += samples;
@@ -1941,7 +1943,7 @@
psnr->sse[0] = total_sse;
psnr->samples[0] = total_samples;
- psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, 255.0,
+ psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
(double)total_sse);
}
@@ -3699,7 +3701,7 @@
vp9_clear_system_state();
#if CONFIG_VP9_HIGHBITDEPTH
- calc_highbd_psnr(orig, recon, &psnr, cpi->mb.e_mbd.bd,
+ calc_highbd_psnr(orig, pp, &psnr, cpi->mb.e_mbd.bd,
cpi->oxcf.input_bit_depth);
#else
calc_psnr(orig, pp, &psnr2);
@@ -3714,11 +3716,9 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- frame_ssim2 = vp9_highbd_calc_ssim(
- orig, recon, &weight, xd->bd,
- xd->bd - cpi->oxcf.input_bit_depth);
+ frame_ssim2 = vp9_highbd_calc_ssim(orig, recon, &weight, xd->bd);
} else {
- frame_ssim2 = vp9_calc_ssim(orig, recon, 1, &weight);
+ frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
}
#else
frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
@@ -3730,8 +3730,7 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
frame_ssim2 = vp9_highbd_calc_ssim(
- orig, &cm->post_proc_buffer, &weight,
- xd->bd, xd->bd - cpi->oxcf.input_bit_depth);
+ orig, &cm->post_proc_buffer, &weight, xd->bd);
} else {
frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight);
}
@@ -3757,10 +3756,9 @@
if (cpi->b_calculate_ssimg) {
double y, u, v, frame_all;
#if CONFIG_VP9_HIGHBITDEPTH
- if (cm->use_high) {
+ if (cm->use_highbitdepth) {
frame_all = vp9_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,
- &u, &v, xd->bd,
- xd->bd - cpi->oxcf.input_bit_depth);
+ &u, &v, xd->bd);
} else {
frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,
&v);
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -246,11 +246,11 @@
} else {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+ vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> (xd->bd - 5),
&rate, &dist);
} else {
- vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+ vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> 3, &rate, &dist);
}
#else
--- a/vp9/encoder/vp9_ssim.c
+++ b/vp9/encoder/vp9_ssim.c
@@ -59,26 +59,6 @@
}
}
}
-
-void vp9_highbd_ssim_parms_8x8_shift_c(uint16_t *s, int sp, uint16_t *r, int rp,
- uint32_t *sum_s, uint32_t *sum_r,
- uint32_t *sum_sq_s, uint32_t *sum_sq_r,
- uint32_t *sum_sxr, unsigned int bd,
- unsigned int shift) {
- int i, j;
- const int max_val = (1 << bd) - 1;
- for (i = 0; i < 8; i++, s += sp, r += rp) {
- for (j = 0; j < 8; j++) {
- int sj = s[j];
- int rj = r[j];
- *sum_s += sj;
- *sum_r += rj;
- *sum_sq_s += sj * sj;
- *sum_sq_r += rj * rj;
- *sum_sxr += sj * rj;
- }
- }
-}
#endif // CONFIG_VP9_HIGHBITDEPTH
static const int64_t cc1 = 26634; // (64^2*(.01*255)^2
@@ -112,25 +92,10 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-static double high_ssim_8x8_shift(uint16_t *s, int sp, uint16_t *r, int rp,
- unsigned int bd, unsigned int shift) {
+static double highbd_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,
+ unsigned int bd) {
uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
const int oshift = bd - 8;
- vp9_highbd_ssim_parms_8x8_shift(s, sp, r, rp, &sum_s,
- &sum_r, &sum_sq_s, &sum_sq_r,
- &sum_sxr, bd, shift);
- return similarity(sum_s >> oshift,
- sum_r >> oshift,
- sum_sq_s >> (2 * oshift),
- sum_sq_r >> (2 * oshift),
- sum_sxr >> (2 * oshift),
- 64);
-}
-
-static double high_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,
- unsigned int bd) {
- uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
- const int oshift = bd - 8;
vp9_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
&sum_sxr);
return similarity(sum_s >> oshift,
@@ -167,36 +132,20 @@
#if CONFIG_VP9_HIGHBITDEPTH
double vp9_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
int stride_img2, int width, int height,
- unsigned int bd, unsigned int shift) {
+ unsigned int bd) {
int i, j;
int samples = 0;
double ssim_total = 0;
- if (shift) {
- // sample point start with each 4x4 location
- for (i = 0; i <= height - 8;
- i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
- for (j = 0; j <= width - 8; j += 4) {
- double v = high_ssim_8x8_shift(CONVERT_TO_SHORTPTR(img1 + j),
- stride_img1,
- CONVERT_TO_SHORTPTR(img2 + j),
- stride_img2,
- bd, shift);
- ssim_total += v;
- samples++;
- }
- }
- } else {
- // sample point start with each 4x4 location
- for (i = 0; i <= height - 8;
- i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
- for (j = 0; j <= width - 8; j += 4) {
- double v = high_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,
+ // sample point start with each 4x4 location
+ for (i = 0; i <= height - 8;
+ i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
+ for (j = 0; j <= width - 8; j += 4) {
+ double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,
CONVERT_TO_SHORTPTR(img2 + j), stride_img2,
bd);
- ssim_total += v;
- samples++;
- }
+ ssim_total += v;
+ samples++;
}
}
ssim_total /= samples;
@@ -255,25 +204,21 @@
#if CONFIG_VP9_HIGHBITDEPTH
double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest,
- double *weight, unsigned int bd,
- unsigned int shift) {
+ double *weight, unsigned int bd) {
double a, b, c;
double ssimv;
a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,
source->y_stride, dest->y_stride,
- source->y_crop_width, source->y_crop_height,
- bd, shift);
+ source->y_crop_width, source->y_crop_height, bd);
b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,
source->uv_stride, dest->uv_stride,
- source->uv_crop_width, source->uv_crop_height,
- bd, shift);
+ source->uv_crop_width, source->uv_crop_height, bd);
c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,
source->uv_stride, dest->uv_stride,
- source->uv_crop_width, source->uv_crop_height,
- bd, shift);
+ source->uv_crop_width, source->uv_crop_height, bd);
ssimv = a * .8 + .1 * (b + c);
@@ -284,25 +229,21 @@
double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest, double *ssim_y,
- double *ssim_u, double *ssim_v,
- unsigned int bd, unsigned int shift) {
+ double *ssim_u, double *ssim_v, unsigned int bd) {
double ssim_all = 0;
double a, b, c;
a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,
source->y_stride, dest->y_stride,
- source->y_crop_width, source->y_crop_height,
- bd, shift);
+ source->y_crop_width, source->y_crop_height, bd);
b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,
source->uv_stride, dest->uv_stride,
- source->uv_crop_width, source->uv_crop_height,
- bd, shift);
+ source->uv_crop_width, source->uv_crop_height, bd);
c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,
source->uv_stride, dest->uv_stride,
- source->uv_crop_width, source->uv_crop_height,
- bd, shift);
+ source->uv_crop_width, source->uv_crop_height, bd);
*ssim_y = a;
*ssim_u = b;
*ssim_v = c;
--- a/vp9/encoder/vp9_ssim.h
+++ b/vp9/encoder/vp9_ssim.h
@@ -27,8 +27,7 @@
double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest,
double *weight,
- unsigned int bd,
- unsigned int shift);
+ unsigned int bd);
double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest,
@@ -35,8 +34,7 @@
double *ssim_y,
double *ssim_u,
double *ssim_v,
- unsigned int bps,
- unsigned int shift);
+ unsigned int bd);
#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -261,7 +261,7 @@
}
static INLINE void add_token(TOKENEXTRA **t, const vp9_prob *context_tree,
- int16_t extra, uint8_t token,
+ int32_t extra, uint8_t token,
uint8_t skip_eob_node,
unsigned int *counts) {
(*t)->token = token;
@@ -329,7 +329,7 @@
scan = so->scan;
nb = so->neighbors;
c = 0;
-#if CONFIG_VP9_HIGH && CONFIG_HIGH_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->common.profile >= PROFILE_2) {
dct_value_tokens = (cpi->common.bit_depth == VPX_BITS_10 ?
vp9_dct_value_tokens_high10_ptr :
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -26,12 +26,20 @@
typedef struct {
int16_t token;
+#if CONFIG_VP9_HIGHBITDEPTH
+ int32_t extra;
+#else
int16_t extra;
+#endif
} TOKENVALUE;
typedef struct {
const vp9_prob *context_tree;
+#if CONFIG_VP9_HIGHBITDEPTH
+ int32_t extra;
+#else
int16_t extra;
+#endif
uint8_t token;
uint8_t skip_eob_node;
} TOKENEXTRA;
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -113,8 +113,6 @@
#else
yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
#endif // CONFIG_VP9_HIGHBITDEPTH
-
- yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
return VPX_CODEC_OK;
}
--- a/vpx/src/vpx_image.c
+++ b/vpx/src/vpx_image.c
@@ -15,17 +15,17 @@
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
-static vpx_image_t *img_alloc_helper(vpx_image_t *img,
- vpx_img_fmt_t fmt,
- unsigned int d_w,
- unsigned int d_h,
- unsigned int buf_align,
- unsigned int stride_align,
+static vpx_image_t *img_alloc_helper(vpx_image_t *img,
+ vpx_img_fmt_t fmt,
+ unsigned int d_w,
+ unsigned int d_h,
+ unsigned int buf_align,
+ unsigned int stride_align,
unsigned char *img_data) {
+ unsigned int h, w, s, xcs, ycs, bps;
+ unsigned int stride_in_bytes;
+ int align;
- unsigned int h, w, s, xcs, ycs, bps;
- int align;
-
/* Treat align==0 like align==1 */
if (!buf_align)
buf_align = 1;
@@ -125,6 +125,7 @@
h = (d_h + align) & ~align;
s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8;
s = (s + stride_align - 1) & ~(stride_align - 1);
+ stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s;
/* Allocate the new image */
if (!img) {
@@ -163,8 +164,8 @@
img->bps = bps;
/* Calculate strides */
- img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = s;
- img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = s >> xcs;
+ img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = stride_in_bytes;
+ img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = stride_in_bytes >> xcs;
/* Default viewport to entire image */
if (!vpx_img_set_rect(img, 0, 0, d_w, d_h))
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1695,7 +1695,7 @@
}
-static void show_psnr(struct stream_state *stream) {
+static void show_psnr(struct stream_state *stream, double peak) {
int i;
double ovpsnr;
@@ -1703,7 +1703,7 @@
return;
fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index);
- ovpsnr = sse_to_psnr((double)stream->psnr_samples_total, 255.0,
+ ovpsnr = sse_to_psnr((double)stream->psnr_samples_total, peak,
(double)stream->psnr_sse_total);
fprintf(stderr, " %.3f", ovpsnr);
@@ -1784,8 +1784,8 @@
int h = src->h;
int x, y;
if (plane) {
- w >>= src->x_chroma_shift;
- h >>= src->y_chroma_shift;
+ w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
+ h = (h + src->y_chroma_shift) >> src->y_chroma_shift;
}
for (y = 0; y < h; y++) {
uint8_t *p_src = src->planes[plane] + y * src->stride[plane];
@@ -2272,24 +2272,29 @@
if (stream_cnt > 1)
fprintf(stderr, "\n");
- if (!global.quiet)
- FOREACH_STREAM(fprintf(
- stderr,
- "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"
- " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,
- global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
- seen_frames ? (unsigned long)(stream->nbytes * 8 / seen_frames) : 0,
- seen_frames ? (int64_t)stream->nbytes * 8
- * (int64_t)global.framerate.num / global.framerate.den
- / seen_frames
- : 0,
- stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
- stream->cx_time > 9999999 ? "ms" : "us",
- usec_to_fps(stream->cx_time, seen_frames));
- );
+ if (!global.quiet) {
+ FOREACH_STREAM(fprintf(stderr,
+ "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7"PRId64"b/f %7"PRId64"b/s"
+ " %7"PRId64" %s (%.2f fps)\033[K\n",
+ pass + 1,
+ global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
+ seen_frames ? (int64_t)(stream->nbytes * 8 / seen_frames) : 0,
+ seen_frames ? (int64_t)stream->nbytes * 8 *
+ (int64_t)global.framerate.num / global.framerate.den /
+ seen_frames : 0,
+ stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
+ stream->cx_time > 9999999 ? "ms" : "us",
+ usec_to_fps(stream->cx_time, seen_frames)));
+ }
- if (global.show_psnr)
- FOREACH_STREAM(show_psnr(stream));
+ if (global.show_psnr) {
+ if (global.codec->fourcc == VP9_FOURCC) {
+ FOREACH_STREAM(
+ show_psnr(stream, (1 << stream->config.cfg.g_input_bit_depth) - 1));
+ } else {
+ FOREACH_STREAM(show_psnr(stream, 255.0));
+ }
+ }
FOREACH_STREAM(vpx_codec_destroy(&stream->encoder));