ref: b26232eb1b2e7253687770b9b5b76a2d465e70bb
parent: f80d8011a0141a28d8e4a02720483bb6bcb09c6b
author: Linfeng Zhang <[email protected]>
date: Tue May 31 06:38:01 EDT 2016
Update filter_selectively_vert_row2() Reduce operations and jumps. perf shows CPU time reduced from 1.9% to 1.6% when decoding fdJc1_IBKJA.248.webm on Xeon E5. Will apply the changes to vp10 after code review. Change-Id: I9351509922855d8896ddef1ed093b3ca12619a61
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -298,97 +298,83 @@
static void filter_selectively_vert_row2(int subsampling_factor,
uint8_t *s, int pitch,
- unsigned int mask_16x16_l,
- unsigned int mask_8x8_l,
- unsigned int mask_4x4_l,
- unsigned int mask_4x4_int_l,
- const loop_filter_info_n *lfi_n,
+ unsigned int mask_16x16,
+ unsigned int mask_8x8,
+ unsigned int mask_4x4,
+ unsigned int mask_4x4_int,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
- const int mask_shift = subsampling_factor ? 4 : 8;
- const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+ const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
const int lfl_forward = subsampling_factor ? 4 : 8;
-
- unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
- unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
- unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
- unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
- unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
- unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+ const unsigned int dual_one = 1 | (1 << lfl_forward);
unsigned int mask;
+ uint8_t *ss[2];
+ ss[0] = s;
- for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
- mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
- mask; mask >>= 1) {
- const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
- const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
+ for (mask =
+ (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+ mask; mask = (mask & ~dual_one) >> 1) {
+ if (mask & dual_one) {
+ const loop_filter_thresh *lfis[2];
+ lfis[0] = lfthr + *lfl;
+ lfis[1] = lfthr + *(lfl + lfl_forward);
+ ss[1] = ss[0] + 8 * pitch;
- if (mask & 1) {
- if ((mask_16x16_0 | mask_16x16_1) & 1) {
- if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
- } else if (mask_16x16_0 & 1) {
- vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
+ if (mask_16x16 & dual_one) {
+ if ((mask_16x16 & dual_one) == dual_one) {
+ vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+ lfis[0]->hev_thr);
} else {
- vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+ vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr);
}
}
- if ((mask_8x8_0 | mask_8x8_1) & 1) {
- if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else if (mask_8x8_0 & 1) {
- vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+ if (mask_8x8 & dual_one) {
+ if ((mask_8x8 & dual_one) == dual_one) {
+ vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+ lfis[0]->hev_thr, lfis[1]->mblim,
+ lfis[1]->lim, lfis[1]->hev_thr);
} else {
- vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
+ const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+ vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
}
}
- if ((mask_4x4_0 | mask_4x4_1) & 1) {
- if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else if (mask_4x4_0 & 1) {
- vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+ if (mask_4x4 & dual_one) {
+ if ((mask_4x4 & dual_one) == dual_one) {
+ vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+ lfis[0]->hev_thr, lfis[1]->mblim,
+ lfis[1]->lim, lfis[1]->hev_thr);
} else {
- vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
+ const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+ vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
}
}
- if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
- if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else if (mask_4x4_int_0 & 1) {
- vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
+ if (mask_4x4_int & dual_one) {
+ if ((mask_4x4_int & dual_one) == dual_one) {
+ vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr,
+ lfis[1]->mblim, lfis[1]->lim,
+ lfis[1]->hev_thr);
} else {
- vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
+ const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+ vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr);
}
}
}
- s += 8;
+ ss[0] += 8;
lfl += 1;
- mask_16x16_0 >>= 1;
- mask_8x8_0 >>= 1;
- mask_4x4_0 >>= 1;
- mask_4x4_int_0 >>= 1;
- mask_16x16_1 >>= 1;
- mask_8x8_1 >>= 1;
- mask_4x4_1 >>= 1;
- mask_4x4_int_1 >>= 1;
+ mask_16x16 >>= 1;
+ mask_8x8 >>= 1;
+ mask_4x4 >>= 1;
+ mask_4x4_int >>= 1;
}
}
@@ -395,99 +381,85 @@
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_filter_selectively_vert_row2(int subsampling_factor,
uint16_t *s, int pitch,
- unsigned int mask_16x16_l,
- unsigned int mask_8x8_l,
- unsigned int mask_4x4_l,
- unsigned int mask_4x4_int_l,
- const loop_filter_info_n *lfi_n,
+ unsigned int mask_16x16,
+ unsigned int mask_8x8,
+ unsigned int mask_4x4,
+ unsigned int mask_4x4_int,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
- const int mask_shift = subsampling_factor ? 4 : 8;
- const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+ const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
const int lfl_forward = subsampling_factor ? 4 : 8;
-
- unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
- unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
- unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
- unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
- unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
- unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+ const unsigned int dual_one = 1 | (1 << lfl_forward);
unsigned int mask;
+ uint16_t *ss[2];
+ ss[0] = s;
- for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
- mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
- mask; mask >>= 1) {
- const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
- const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
+ for (mask =
+ (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+ mask; mask = (mask & ~dual_one) >> 1) {
+ if (mask & dual_one) {
+ const loop_filter_thresh *lfis[2];
+ lfis[0] = lfthr + *lfl;
+ lfis[1] = lfthr + *(lfl + lfl_forward);
+ ss[1] = ss[0] + 8 * pitch;
- if (mask & 1) {
- if ((mask_16x16_0 | mask_16x16_1) & 1) {
- if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
- } else if (mask_16x16_0 & 1) {
- vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
+ if (mask_16x16 & dual_one) {
+ if ((mask_16x16 & dual_one) == dual_one) {
+ vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr, bd);
} else {
- vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
+ const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+ vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
}
}
- if ((mask_8x8_0 | mask_8x8_1) & 1) {
- if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- } else if (mask_8x8_0 & 1) {
- vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
+ if (mask_8x8 & dual_one) {
+ if ((mask_8x8 & dual_one) == dual_one) {
+ vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr,
+ lfis[1]->mblim, lfis[1]->lim,
+ lfis[1]->hev_thr, bd);
} else {
- vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
+ const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+ vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
}
}
- if ((mask_4x4_0 | mask_4x4_1) & 1) {
- if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- } else if (mask_4x4_0 & 1) {
- vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
+ if (mask_4x4 & dual_one) {
+ if ((mask_4x4 & dual_one) == dual_one) {
+ vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr,
+ lfis[1]->mblim, lfis[1]->lim,
+ lfis[1]->hev_thr, bd);
} else {
- vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
+ const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+ vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
}
}
- if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
- if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- } else if (mask_4x4_int_0 & 1) {
- vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
+ if (mask_4x4_int & dual_one) {
+ if ((mask_4x4_int & dual_one) == dual_one) {
+ vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr,
+ lfis[1]->mblim, lfis[1]->lim,
+ lfis[1]->hev_thr, bd);
} else {
- vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
+ const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+ vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,
+ lfi->mblim, lfi->lim, lfi->hev_thr, bd);
}
}
}
- s += 8;
+ ss[0] += 8;
lfl += 1;
- mask_16x16_0 >>= 1;
- mask_8x8_0 >>= 1;
- mask_4x4_0 >>= 1;
- mask_4x4_int_0 >>= 1;
- mask_16x16_1 >>= 1;
- mask_8x8_1 >>= 1;
- mask_4x4_1 >>= 1;
- mask_4x4_int_1 >>= 1;
+ mask_16x16 >>= 1;
+ mask_8x8 >>= 1;
+ mask_4x4 >>= 1;
+ mask_4x4_int >>= 1;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -497,7 +469,7 @@
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
unsigned int mask;
int count;
@@ -504,10 +476,10 @@
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
count = 1;
if (mask & 1) {
+ const loop_filter_thresh *lfi = lfthr + *lfl;
+
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
@@ -520,7 +492,7 @@
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -549,7 +521,7 @@
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -574,7 +546,7 @@
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
- } else if (mask_4x4_int & 1) {
+ } else {
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
@@ -594,7 +566,7 @@
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
unsigned int mask;
int count;
@@ -601,10 +573,10 @@
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
count = 1;
if (mask & 1) {
+ const loop_filter_thresh *lfi = lfthr + *lfl;
+
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
@@ -617,7 +589,7 @@
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -650,7 +622,7 @@
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -679,7 +651,7 @@
lfi->lim, lfi->hev_thr, bd);
}
}
- } else if (mask_4x4_int & 1) {
+ } else {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, bd);
}
@@ -1079,13 +1051,13 @@
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
unsigned int mask;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+ const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask & 1) {
if (mask_16x16 & 1) {
@@ -1113,13 +1085,13 @@
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
unsigned int mask;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+ const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask & 1) {
if (mask_16x16 & 1) {
@@ -1250,23 +1222,18 @@
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
- &cm->lf_info, &lfl[r << 3],
+ cm->lf_info.lfthr, &lfl[r << 3],
(int)cm->bit_depth);
} else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_vert(dst->buf, dst->stride,
mask_16x16_c & border_mask,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
- &cm->lf_info, &lfl[r << 3]);
+ cm->lf_info.lfthr, &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_vert(dst->buf, dst->stride,
- mask_16x16_c & border_mask,
- mask_8x8_c & border_mask,
- mask_4x4_c & border_mask,
- mask_4x4_int[r],
- &cm->lf_info, &lfl[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
mi_8x8 += row_step_stride;
@@ -1299,23 +1266,18 @@
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
- &cm->lf_info, &lfl[r << 3],
+ cm->lf_info.lfthr, &lfl[r << 3],
(int)cm->bit_depth);
} else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride,
mask_16x16_r,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
- &cm->lf_info, &lfl[r << 3]);
+ cm->lf_info.lfthr, &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_horiz(dst->buf, dst->stride,
- mask_16x16_r,
- mask_8x8_r,
- mask_4x4_r,
- mask_4x4_int_r,
- &cm->lf_info, &lfl[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
}
@@ -1337,27 +1299,20 @@
// Vertical pass: do 2 rows at one time
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
- unsigned int mask_16x16_l = mask_16x16 & 0xffff;
- unsigned int mask_8x8_l = mask_8x8 & 0xffff;
- unsigned int mask_4x4_l = mask_4x4 & 0xffff;
- unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
-
-// Disable filtering on the leftmost column.
+ // Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert_row2(
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
+ mask_16x16, mask_8x8, mask_4x4, mask_4x4_int, cm->lf_info.lfthr,
&lfm->lfl_y[r << 3], (int)cm->bit_depth);
} else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_vert_row2(
- plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
- mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
+ plane->subsampling_x, dst->buf, dst->stride, mask_16x16, mask_8x8,
+ mask_4x4, mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_vert_row2(
- plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
- mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 16;
@@ -1390,19 +1345,18 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- highbd_filter_selectively_horiz(
- CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
- (int)cm->bit_depth);
+ highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
+ dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int & 0xff,
+ cm->lf_info.lfthr, &lfm->lfl_y[r << 3],
+ (int)cm->bit_depth);
} else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
- &lfm->lfl_y[r << 3]);
+ mask_4x4_r, mask_4x4_int & 0xff,
+ cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
- &lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
@@ -1436,38 +1390,29 @@
lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
}
- {
- unsigned int mask_16x16_l = mask_16x16 & 0xff;
- unsigned int mask_8x8_l = mask_8x8 & 0xff;
- unsigned int mask_4x4_l = mask_4x4 & 0xff;
- unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
-
-// Disable filtering on the leftmost column.
+ // Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- highbd_filter_selectively_vert_row2(
- plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfl_uv[r << 1], (int)cm->bit_depth);
- } else {
- filter_selectively_vert_row2(
- plane->subsampling_x, dst->buf, dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfl_uv[r << 1]);
- }
-#else
- filter_selectively_vert_row2(
- plane->subsampling_x, dst->buf, dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfl_uv[r << 1]);
+ if (cm->use_highbitdepth) {
+ highbd_filter_selectively_vert_row2(plane->subsampling_x,
+ CONVERT_TO_SHORTPTR(dst->buf),
+ dst->stride, mask_16x16, mask_8x8,
+ mask_4x4, mask_4x4_int,
+ cm->lf_info.lfthr, &lfl_uv[r << 1],
+ (int)cm->bit_depth);
+ } else {
#endif // CONFIG_VP9_HIGHBITDEPTH
-
- dst->buf += 16 * dst->stride;
- mask_16x16 >>= 8;
- mask_8x8 >>= 8;
- mask_4x4 >>= 8;
- mask_4x4_int >>= 8;
+ filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
+ mask_16x16, mask_8x8, mask_4x4, mask_4x4_int,
+ cm->lf_info.lfthr, &lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ dst->buf += 16 * dst->stride;
+ mask_16x16 >>= 8;
+ mask_8x8 >>= 8;
+ mask_4x4 >>= 8;
+ mask_4x4_int >>= 8;
}
// Horizontal pass
@@ -1499,17 +1444,16 @@
if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
- &lfl_uv[r << 1], (int)cm->bit_depth);
+ mask_4x4_r, mask_4x4_int_r,
+ cm->lf_info.lfthr, &lfl_uv[r << 1],
+ (int)cm->bit_depth);
} else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+ mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
&lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
- &lfl_uv[r << 1]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;