ref: 9cafc46d9ea9606d6a5cfb1948331db2aca6a39f
parent: 082434b274d98671cb63b7337062700b79c08313
parent: 965a8dea0bab30e2003e70305ca70abb09b7845d
author: Debargha Mukherjee <[email protected]>
date: Fri Oct 30 16:57:10 EDT 2015
Merge "Convert motion search config from AoS to SoA"
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -103,17 +103,17 @@
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1;
- cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
- cfg->ss[0].offset = 0;
+ cfg->ss_mv[0].col = 0;
+ cfg->ss_mv[0].row = 0;
+ cfg->ss_os[0] = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 4 search sites per step.
const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
int i;
- for (i = 0; i < 4; ++i) {
- search_site *const ss = &cfg->ss[ss_count++];
- ss->mv = ss_mvs[i];
- ss->offset = ss->mv.row * stride + ss->mv.col;
+ for (i = 0; i < 4; ++i, ++ss_count) {
+ cfg->ss_mv[ss_count] = ss_mvs[i];
+ cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
}
}
@@ -124,8 +124,9 @@
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1;
- cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
- cfg->ss[0].offset = 0;
+ cfg->ss_mv[0].col = 0;
+ cfg->ss_mv[0].row = 0;
+ cfg->ss_os[0] = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 8 search sites per step.
@@ -134,10 +135,9 @@
{-len, -len}, {-len, len}, {len, -len}, {len, len}
};
int i;
- for (i = 0; i < 8; ++i) {
- search_site *const ss = &cfg->ss[ss_count++];
- ss->mv = ss_mvs[i];
- ss->offset = ss->mv.row * stride + ss->mv.col;
+ for (i = 0; i < 8; ++i, ++ss_count) {
+ cfg->ss_mv[ss_count] = ss_mvs[i];
+ cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
}
}
@@ -1623,7 +1623,9 @@
// 0 = initial step (MAX_FIRST_STEP) pel
// 1 = (MAX_FIRST_STEP/2) pel,
// 2 = (MAX_FIRST_STEP/4) pel...
- const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+// const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
+ const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
@@ -1649,10 +1651,10 @@
// All_in is true if every one of the points we are checking are within
// the bounds of the image.
- all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
- all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
- all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
- all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
+ all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_row_min);
+ all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_row_max);
+ all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_col_min);
+ all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_col_max);
// If all the pixels are within the bounds we don't check whether the
// search point is valid in this loop, otherwise we check each point
@@ -1664,7 +1666,7 @@
unsigned char const *block_offset[4];
for (t = 0; t < 4; t++)
- block_offset[t] = ss[i + t].offset + best_address;
+ block_offset[t] = ss_os[i + t] + best_address;
fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
sad_array);
@@ -1671,8 +1673,8 @@
for (t = 0; t < 4; t++, i++) {
if (sad_array[t] < bestsad) {
- const MV this_mv = {best_mv->row + ss[i].mv.row,
- best_mv->col + ss[i].mv.col};
+ const MV this_mv = {best_mv->row + ss_mv[i].row,
+ best_mv->col + ss_mv[i].col};
sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
sad_per_bit);
if (sad_array[t] < bestsad) {
@@ -1685,11 +1687,11 @@
} else {
for (j = 0; j < cfg->searches_per_step; j++) {
// Trap illegal vectors
- const MV this_mv = {best_mv->row + ss[i].mv.row,
- best_mv->col + ss[i].mv.col};
+ const MV this_mv = {best_mv->row + ss_mv[i].row,
+ best_mv->col + ss_mv[i].col};
if (is_mv_in(x, &this_mv)) {
- const uint8_t *const check_here = ss[i].offset + best_address;
+ const uint8_t *const check_here = ss_os[i] + best_address;
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
in_what_stride);
@@ -1705,16 +1707,16 @@
}
}
if (best_site != last_site) {
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
+ best_mv->row += ss_mv[best_site].row;
+ best_mv->col += ss_mv[best_site].col;
+ best_address += ss_os[best_site];
last_site = best_site;
#if defined(NEW_DIAMOND_SEARCH)
while (1) {
- const MV this_mv = {best_mv->row + ss[best_site].mv.row,
- best_mv->col + ss[best_site].mv.col};
+ const MV this_mv = {best_mv->row + ss_mv[best_site].row,
+ best_mv->col + ss_mv[best_site].col};
if (is_mv_in(x, &this_mv)) {
- const uint8_t *const check_here = ss[best_site].offset + best_address;
+ const uint8_t *const check_here = ss_os[best_site] + best_address;
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
in_what_stride);
if (thissad < bestsad) {
@@ -1721,9 +1723,9 @@
thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
+ best_mv->row += ss_mv[best_site].row;
+ best_mv->col += ss_mv[best_site].col;
+ best_address += ss_os[best_site];
continue;
}
}
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -31,14 +31,10 @@
// for Block_16x16
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
-// motion search site
-typedef struct search_site {
- MV mv;
- int offset;
-} search_site;
-
typedef struct search_site_config {
- search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
+ // motion search sites
+ MV ss_mv[8 * MAX_MVSEARCH_STEPS + 1]; // Motion vector
+ intptr_t ss_os[8 * MAX_MVSEARCH_STEPS + 1]; // Offset
int ss_count;
int searches_per_step;
} search_site_config;