ref: 548889c35ad177861dbec9fd9b14ac9bb070723a
parent: 0546636c74e9e0b5b7700053c6d12d7e0d9994b3
parent: d642294b1c57a5adacb1038ff45766c38bae8a6d
author: James Zern <[email protected]>
date: Tue Feb 16 15:26:41 EST 2016
Merge "Fix tsan error in VP9 sub8x8 intra mode search"
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3031,10 +3031,24 @@
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *mi;
+ ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
+ BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
+ int plane;
+
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
mi = xd->mi[0];
mi->sb_type = bsize;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ struct macroblockd_plane *pd = &xd->plane[plane];
+ memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
+ (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
+ memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
+ (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
+ }
+
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
if (cyclic_refresh_segment_id_boosted(mi->segment_id))
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
@@ -3051,6 +3065,14 @@
rd_cost, bsize, ctx);
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ struct macroblockd_plane *pd = &xd->plane[plane];
+ memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
+ (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
+ memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
+ (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
+ }
if (rd_cost->rate == INT_MAX)
vp9_rd_cost_reset(rd_cost);
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -787,9 +787,9 @@
#if CONFIG_VP9_HIGHBITDEPTH
uint16_t best_dst16[8 * 8];
#endif
+ memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
+ memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
- memcpy(ta, a, sizeof(ta));
- memcpy(tl, l, sizeof(tl));
xd->mi[0]->tx_size = TX_4X4;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -810,8 +810,8 @@
continue;
}
- memcpy(tempa, ta, sizeof(ta));
- memcpy(templ, tl, sizeof(tl));
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
@@ -874,8 +874,8 @@
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
- memcpy(a, tempa, sizeof(tempa));
- memcpy(l, templ, sizeof(templ));
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
memcpy(best_dst16 + idy * 8,
CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
@@ -914,8 +914,8 @@
continue;
}
- memcpy(tempa, ta, sizeof(ta));
- memcpy(templ, tl, sizeof(tl));
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
@@ -976,8 +976,8 @@
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
- memcpy(a, tempa, sizeof(tempa));
- memcpy(l, templ, sizeof(templ));
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
num_4x4_blocks_wide * 4);
@@ -1013,12 +1013,8 @@
int64_t total_distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
- ENTROPY_CONTEXT t_above[4], t_left[4];
const int *bmode_costs = cpi->mbmode_cost;
- memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
- memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
-
// Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1034,8 +1030,11 @@
}
this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
- bmode_costs, t_above + idx, t_left + idy,
+ bmode_costs,
+ xd->plane[0].above_context + idx,
+ xd->plane[0].left_context + idy,
&r, &ry, &d, bsize, best_rd - total_rd);
+
if (this_rd >= best_rd - total_rd)
return INT64_MAX;