ref: 7e731b6ceaf7ed9c8c07b0040983ec6fedfe42af
parent: ae0e383d3593cf9f9b17d6f499ec01f803faa00e
author: Alex Converse <[email protected]>
date: Wed Dec 18 06:57:40 EST 2013
Fix arnr for 4:4:4. Change-Id: I80a0cea96c65c0cfb530a71053616dba6edeb896
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -502,6 +502,7 @@
YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
const int recon_y_stride = lst_yv12->y_stride;
const int recon_uv_stride = lst_yv12->uv_stride;
+ const int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height);
int64_t intra_error = 0;
int64_t coded_error = 0;
int64_t sr_coded_error = 0;
@@ -565,7 +566,7 @@
// reset above block coeffs
xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16);
- recon_uvoffset = (mb_row * recon_uv_stride * 8);
+ recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height);
// Set up limit values for motion vectors to prevent them extending
// outside the UMV borders
@@ -780,17 +781,19 @@
// adjust to the next column of macroblocks
x->plane[0].src.buf += 16;
- x->plane[1].src.buf += 8;
- x->plane[2].src.buf += 8;
+ x->plane[1].src.buf += uv_mb_height;
+ x->plane[2].src.buf += uv_mb_height;
recon_yoffset += 16;
- recon_uvoffset += 8;
+ recon_uvoffset += uv_mb_height;
}
// adjust to the next row of mbs
x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
- x->plane[1].src.buf += 8 * x->plane[1].src.stride - 8 * cm->mb_cols;
- x->plane[2].src.buf += 8 * x->plane[1].src.stride - 8 * cm->mb_cols;
+ x->plane[1].src.buf += uv_mb_height * x->plane[1].src.stride -
+ uv_mb_height * cm->mb_cols;
+ x->plane[2].src.buf += uv_mb_height * x->plane[1].src.stride -
+ uv_mb_height * cm->mb_cols;
vp9_clear_system_state(); // __asm emms;
}
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -36,6 +36,7 @@
uint8_t *u_mb_ptr,
uint8_t *v_mb_ptr,
int stride,
+ int uv_block_size,
int mv_row,
int mv_col,
uint8_t *pred,
@@ -42,6 +43,15 @@
struct scale_factors *scale) {
const int which_mv = 0;
MV mv = { mv_row, mv_col };
+ enum mv_precision mv_precision_uv;
+ int uv_stride;
+ if (uv_block_size == 8) {
+ uv_stride = (stride + 1) >> 1;
+ mv_precision_uv = MV_PRECISION_Q4;
+ } else {
+ uv_stride = stride;
+ mv_precision_uv = MV_PRECISION_Q3;
+ }
vp9_build_inter_predictor(y_mb_ptr, stride,
&pred[0], 16,
@@ -51,23 +61,22 @@
which_mv,
&xd->subpix, MV_PRECISION_Q3);
- stride = (stride + 1) >> 1;
- vp9_build_inter_predictor(u_mb_ptr, stride,
- &pred[256], 8,
+ vp9_build_inter_predictor(u_mb_ptr, uv_stride,
+ &pred[256], uv_block_size,
&mv,
scale,
- 8, 8,
+ uv_block_size, uv_block_size,
which_mv,
- &xd->subpix, MV_PRECISION_Q4);
+ &xd->subpix, mv_precision_uv);
- vp9_build_inter_predictor(v_mb_ptr, stride,
- &pred[320], 8,
+ vp9_build_inter_predictor(v_mb_ptr, uv_stride,
+ &pred[512], uv_block_size,
&mv,
scale,
- 8, 8,
+ uv_block_size, uv_block_size,
which_mv,
- &xd->subpix, MV_PRECISION_Q4);
+ &xd->subpix, mv_precision_uv);
}
void vp9_temporal_filter_apply_c(uint8_t *frame1,
@@ -197,17 +206,21 @@
int mb_rows = cpi->common.mb_rows;
int mb_y_offset = 0;
int mb_uv_offset = 0;
- DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 + 8 * 8 + 8 * 8);
- DECLARE_ALIGNED_ARRAY(16, uint16_t, count, 16 * 16 + 8 * 8 + 8 * 8);
+ DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 * 3);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, count, 16 * 16 * 3);
MACROBLOCKD *mbd = &cpi->mb.e_mbd;
YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
uint8_t *dst1, *dst2;
- DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor, 16 * 16 + 8 * 8 + 8 * 8);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor, 16 * 16 * 3);
+ const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
// Save input state
uint8_t* input_buffer[MAX_MB_PLANE];
int i;
+ // TODO(aconverse): Add 4:2:2 support
+ assert(mbd->plane[1].subsampling_x == mbd->plane[1].subsampling_y);
+
for (i = 0; i < MAX_MB_PLANE; i++)
input_buffer[i] = mbd->plane[i].pre[0].buf;
@@ -233,8 +246,8 @@
int i, j, k;
int stride;
- vpx_memset(accumulator, 0, 384 * sizeof(unsigned int));
- vpx_memset(count, 0, 384 * sizeof(uint16_t));
+ vpx_memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
+ vpx_memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
#if ALT_REF_MC_ENABLED
cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
@@ -280,6 +293,7 @@
cpi->frames[frame]->u_buffer + mb_uv_offset,
cpi->frames[frame]->v_buffer + mb_uv_offset,
cpi->frames[frame]->y_stride,
+ mb_uv_height,
mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row,
mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col,
predictor, scale);
@@ -290,12 +304,14 @@
accumulator, count);
vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256, 8, strength, filter_weight,
- accumulator + 256, count + 256);
+ predictor + 256, mb_uv_height, strength,
+ filter_weight, accumulator + 256,
+ count + 256);
vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 320, 8, strength, filter_weight,
- accumulator + 320, count + 320);
+ predictor + 512, mb_uv_height, strength,
+ filter_weight, accumulator + 512,
+ count + 512);
}
}
@@ -322,9 +338,9 @@
dst2 = cpi->alt_ref_buffer.v_buffer;
stride = cpi->alt_ref_buffer.uv_stride;
byte = mb_uv_offset;
- for (i = 0, k = 256; i < 8; i++) {
- for (j = 0; j < 8; j++, k++) {
- int m = k + 64;
+ for (i = 0, k = 256; i < mb_uv_height; i++) {
+ for (j = 0; j < mb_uv_height; j++, k++) {
+ int m = k + 256;
// U
unsigned int pval = accumulator[k] + (count[k] >> 1);
@@ -342,15 +358,15 @@
byte++;
}
- byte += stride - 8;
+ byte += stride - mb_uv_height;
}
mb_y_offset += 16;
- mb_uv_offset += 8;
+ mb_uv_offset += mb_uv_height;
}
mb_y_offset += 16 * (f->y_stride - mb_cols);
- mb_uv_offset += 8 * (f->uv_stride - mb_cols);
+ mb_uv_offset += mb_uv_height * (f->uv_stride - mb_cols);
}
// Restore input state