shithub: libvpx

--- a/vp9/common/vp9_loopfilter.c

+++ b/vp9/common/vp9_loopfilter.c

@@ -298,97 +298,83 @@

 static void filter_selectively_vert_row2(int subsampling_factor,

                                          uint8_t *s, int pitch,

-                                         unsigned int mask_16x16_l,

-                                         unsigned int mask_8x8_l,

-                                         unsigned int mask_4x4_l,

-                                         unsigned int mask_4x4_int_l,

-                                         const loop_filter_info_n *lfi_n,

+                                         unsigned int mask_16x16,

+                                         unsigned int mask_8x8,

+                                         unsigned int mask_4x4,

+                                         unsigned int mask_4x4_int,

+                                         const loop_filter_thresh *lfthr,

                                          const uint8_t *lfl) {

-  const int mask_shift = subsampling_factor ? 4 : 8;

-  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;

+  const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;

   const int lfl_forward = subsampling_factor ? 4 : 8;

-  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;

-  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;

-  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;

-  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;

-  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;

-  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;

-  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;

-  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;

+  const unsigned int dual_one = 1 | (1 << lfl_forward);

   unsigned int mask;

+  uint8_t *ss[2];

+  ss[0] = s;

-  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |

-              mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;

-       mask; mask >>= 1) {

-    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;

-    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);

+  for (mask =

+           (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;

+       mask; mask = (mask & ~dual_one) >> 1) {

+    if (mask & dual_one) {

+      const loop_filter_thresh *lfis[2];

+      lfis[0] = lfthr + *lfl;

+      lfis[1] = lfthr + *(lfl + lfl_forward);

+      ss[1] = ss[0] + 8 * pitch;

-    if (mask & 1) {

-      if ((mask_16x16_0 | mask_16x16_1) & 1) {

-        if ((mask_16x16_0 & mask_16x16_1) & 1) {

-          vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,

-                                   lfi0->hev_thr);

-        } else if (mask_16x16_0 & 1) {

-          vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,

-                              lfi0->hev_thr);

+      if (mask_16x16 & dual_one) {

+        if ((mask_16x16 & dual_one) == dual_one) {

+          vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,

+                                   lfis[0]->hev_thr);

         } else {

-          vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,

-                              lfi1->lim, lfi1->hev_thr);

+          const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];

+          vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,

+                              lfi->lim, lfi->hev_thr);

-      if ((mask_8x8_0 | mask_8x8_1) & 1) {

-        if ((mask_8x8_0 & mask_8x8_1) & 1) {

-          vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,

-                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,

-                                  lfi1->hev_thr);

-        } else if (mask_8x8_0 & 1) {

-          vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);

+      if (mask_8x8 & dual_one) {

+        if ((mask_8x8 & dual_one) == dual_one) {

+          vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,

+                                  lfis[0]->hev_thr, lfis[1]->mblim,

+                                  lfis[1]->lim, lfis[1]->hev_thr);

         } else {

-          vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,

-                             lfi1->hev_thr);

+          const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];

+          vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,

+                             lfi->hev_thr);

-      if ((mask_4x4_0 | mask_4x4_1) & 1) {

-        if ((mask_4x4_0 & mask_4x4_1) & 1) {

-          vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,

-                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,

-                                  lfi1->hev_thr);

-        } else if (mask_4x4_0 & 1) {

-          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);

+      if (mask_4x4 & dual_one) {

+        if ((mask_4x4 & dual_one) == dual_one) {

+          vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,

+                                  lfis[0]->hev_thr, lfis[1]->mblim,

+                                  lfis[1]->lim, lfis[1]->hev_thr);

         } else {

-          vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,

-                             lfi1->hev_thr);

+          const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];

+          vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,

+                             lfi->hev_thr);

-      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {

-        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {

-          vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,

-                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,

-                                  lfi1->hev_thr);

-        } else if (mask_4x4_int_0 & 1) {

-          vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,

-                             lfi0->hev_thr);

+      if (mask_4x4_int & dual_one) {

+        if ((mask_4x4_int & dual_one) == dual_one) {

+          vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,

+                                  lfis[0]->lim, lfis[0]->hev_thr,

+                                  lfis[1]->mblim, lfis[1]->lim,

+                                  lfis[1]->hev_thr);

         } else {

-          vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,

-                             lfi1->hev_thr);

+          const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];

+          vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,

+                             lfi->lim, lfi->hev_thr);

-    s += 8;

+    ss[0] += 8;

     lfl += 1;

-    mask_16x16_0 >>= 1;

-    mask_8x8_0 >>= 1;

-    mask_4x4_0 >>= 1;

-    mask_4x4_int_0 >>= 1;

-    mask_16x16_1 >>= 1;

-    mask_8x8_1 >>= 1;

-    mask_4x4_1 >>= 1;

-    mask_4x4_int_1 >>= 1;

+    mask_16x16 >>= 1;

+    mask_8x8 >>= 1;

+    mask_4x4 >>= 1;

+    mask_4x4_int >>= 1;

@@ -395,99 +381,85 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 static void highbd_filter_selectively_vert_row2(int subsampling_factor,

                                                 uint16_t *s, int pitch,

-                                                unsigned int mask_16x16_l,

-                                                unsigned int mask_8x8_l,

-                                                unsigned int mask_4x4_l,

-                                                unsigned int mask_4x4_int_l,

-                                                const loop_filter_info_n *lfi_n,

+                                                unsigned int mask_16x16,

+                                                unsigned int mask_8x8,

+                                                unsigned int mask_4x4,

+                                                unsigned int mask_4x4_int,

+                                                const loop_filter_thresh *lfthr,

                                                 const uint8_t *lfl, int bd) {

-  const int mask_shift = subsampling_factor ? 4 : 8;

-  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;

+  const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;

   const int lfl_forward = subsampling_factor ? 4 : 8;

-  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;

-  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;

-  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;

-  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;

-  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;

-  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;

-  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;

-  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;

+  const unsigned int dual_one = 1 | (1 << lfl_forward);

   unsigned int mask;

+  uint16_t *ss[2];

+  ss[0] = s;

-  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |

-       mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;

-       mask; mask >>= 1) {

-    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;

-    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);

+  for (mask =

+           (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;

+       mask; mask = (mask & ~dual_one) >> 1) {

+    if (mask & dual_one) {

+      const loop_filter_thresh *lfis[2];

+      lfis[0] = lfthr + *lfl;

+      lfis[1] = lfthr + *(lfl + lfl_forward);

+      ss[1] = ss[0] + 8 * pitch;

-    if (mask & 1) {

-      if ((mask_16x16_0 | mask_16x16_1) & 1) {

-        if ((mask_16x16_0 & mask_16x16_1) & 1) {

-          vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,

-                                          lfi0->hev_thr, bd);

-        } else if (mask_16x16_0 & 1) {

-          vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,

-                                     lfi0->hev_thr, bd);

+      if (mask_16x16 & dual_one) {

+        if ((mask_16x16 & dual_one) == dual_one) {

+          vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,

+                                          lfis[0]->lim, lfis[0]->hev_thr, bd);

         } else {

-          vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,

-                                     lfi1->lim, lfi1->hev_thr, bd);

+          const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];

+          vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,

+                                     lfi->lim, lfi->hev_thr, bd);

-      if ((mask_8x8_0 | mask_8x8_1) & 1) {

-        if ((mask_8x8_0 & mask_8x8_1) & 1) {

-          vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,

-                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,

-                                         lfi1->hev_thr, bd);

-        } else if (mask_8x8_0 & 1) {

-          vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,

-                                    lfi0->hev_thr, bd);

+      if (mask_8x8 & dual_one) {

+        if ((mask_8x8 & dual_one) == dual_one) {

+          vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim,

+                                         lfis[0]->lim, lfis[0]->hev_thr,

+                                         lfis[1]->mblim, lfis[1]->lim,

+                                         lfis[1]->hev_thr, bd);

         } else {

-          vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,

-                                    lfi1->lim, lfi1->hev_thr, bd);

+          const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];

+          vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,

+                                    lfi->lim, lfi->hev_thr, bd);

-      if ((mask_4x4_0 | mask_4x4_1) & 1) {

-        if ((mask_4x4_0 & mask_4x4_1) & 1) {

-          vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,

-                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,

-                                         lfi1->hev_thr, bd);

-        } else if (mask_4x4_0 & 1) {

-          vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,

-                                    lfi0->hev_thr, bd);

+      if (mask_4x4 & dual_one) {

+        if ((mask_4x4 & dual_one) == dual_one) {

+          vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim,

+                                         lfis[0]->lim, lfis[0]->hev_thr,

+                                         lfis[1]->mblim, lfis[1]->lim,

+                                         lfis[1]->hev_thr, bd);

         } else {

-          vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,

-                                    lfi1->lim, lfi1->hev_thr, bd);

+          const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];

+          vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,

+                                    lfi->lim, lfi->hev_thr, bd);

-      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {

-        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {

-          vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,

-                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,

-                                         lfi1->hev_thr, bd);

-        } else if (mask_4x4_int_0 & 1) {

-          vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,

-                                    lfi0->hev_thr, bd);

+      if (mask_4x4_int & dual_one) {

+        if ((mask_4x4_int & dual_one) == dual_one) {

+          vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,

+                                         lfis[0]->lim, lfis[0]->hev_thr,

+                                         lfis[1]->mblim, lfis[1]->lim,

+                                         lfis[1]->hev_thr, bd);

         } else {

-          vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,

-                                    lfi1->lim, lfi1->hev_thr, bd);

+          const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];

+          vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,

+                                    lfi->mblim, lfi->lim, lfi->hev_thr, bd);

-    s += 8;

+    ss[0] += 8;

     lfl += 1;

-    mask_16x16_0 >>= 1;

-    mask_8x8_0 >>= 1;

-    mask_4x4_0 >>= 1;

-    mask_4x4_int_0 >>= 1;

-    mask_16x16_1 >>= 1;

-    mask_8x8_1 >>= 1;

-    mask_4x4_1 >>= 1;

-    mask_4x4_int_1 >>= 1;

+    mask_16x16 >>= 1;

+    mask_8x8 >>= 1;

+    mask_4x4 >>= 1;

+    mask_4x4_int >>= 1;

 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -497,7 +469,7 @@

                                      unsigned int mask_8x8,

                                      unsigned int mask_4x4,

                                      unsigned int mask_4x4_int,

-                                     const loop_filter_info_n *lfi_n,

+                                     const loop_filter_thresh *lfthr,

                                      const uint8_t *lfl) {

   unsigned int mask;

   int count;

@@ -504,10 +476,10 @@

   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;

        mask; mask >>= count) {

-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;

     count = 1;

     if (mask & 1) {

+      const loop_filter_thresh *lfi = lfthr + *lfl;

       if (mask_16x16 & 1) {

         if ((mask_16x16 & 3) == 3) {

           vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,

@@ -520,7 +492,7 @@

       } else if (mask_8x8 & 1) {

         if ((mask_8x8 & 3) == 3) {

           // Next block's thresholds.

-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);

           vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,

                                     lfi->hev_thr, lfin->mblim, lfin->lim,

@@ -549,7 +521,7 @@

       } else if (mask_4x4 & 1) {

         if ((mask_4x4 & 3) == 3) {

           // Next block's thresholds.

-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);

           vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,

                                     lfi->hev_thr, lfin->mblim, lfin->lim,

@@ -574,7 +546,7 @@

             vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

                                  lfi->hev_thr);

-      } else if (mask_4x4_int & 1) {

+      } else {

         vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

                              lfi->hev_thr);

@@ -594,7 +566,7 @@

                                             unsigned int mask_8x8,

                                             unsigned int mask_4x4,

                                             unsigned int mask_4x4_int,

-                                            const loop_filter_info_n *lfi_n,

+                                            const loop_filter_thresh *lfthr,

                                             const uint8_t *lfl, int bd) {

   unsigned int mask;

   int count;

@@ -601,10 +573,10 @@

   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;

        mask; mask >>= count) {

-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;

     count = 1;

     if (mask & 1) {

+      const loop_filter_thresh *lfi = lfthr + *lfl;

       if (mask_16x16 & 1) {

         if ((mask_16x16 & 3) == 3) {

           vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,

@@ -617,7 +589,7 @@

       } else if (mask_8x8 & 1) {

         if ((mask_8x8 & 3) == 3) {

           // Next block's thresholds.

-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);

           vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,

                                            lfi->hev_thr, lfin->mblim, lfin->lim,

@@ -650,7 +622,7 @@

       } else if (mask_4x4 & 1) {

         if ((mask_4x4 & 3) == 3) {

           // Next block's thresholds.

-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);

           vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,

                                            lfi->hev_thr, lfin->mblim, lfin->lim,

@@ -679,7 +651,7 @@

                                         lfi->lim, lfi->hev_thr, bd);

-      } else if (mask_4x4_int & 1) {

+      } else {

         vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,

                                     lfi->hev_thr, bd);

@@ -1079,13 +1051,13 @@

                                     unsigned int mask_8x8,

                                     unsigned int mask_4x4,

                                     unsigned int mask_4x4_int,

-                                    const loop_filter_info_n *lfi_n,

+                                    const loop_filter_thresh *lfthr,

                                     const uint8_t *lfl) {

   unsigned int mask;

   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;

        mask; mask >>= 1) {

-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;

+    const loop_filter_thresh *lfi = lfthr + *lfl;

     if (mask & 1) {

       if (mask_16x16 & 1) {

@@ -1113,13 +1085,13 @@

                                            unsigned int mask_8x8,

                                            unsigned int mask_4x4,

                                            unsigned int mask_4x4_int,

-                                           const loop_filter_info_n *lfi_n,

+                                           const loop_filter_thresh *lfthr,

                                            const uint8_t *lfl, int bd) {

   unsigned int mask;

   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;

        mask; mask >>= 1) {

-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;

+    const loop_filter_thresh *lfi = lfthr + *lfl;

     if (mask & 1) {

       if (mask_16x16 & 1) {

@@ -1250,23 +1222,18 @@

                                      mask_8x8_c & border_mask,

                                      mask_4x4_c & border_mask,

                                      mask_4x4_int[r],

-                                     &cm->lf_info, &lfl[r << 3],

+                                     cm->lf_info.lfthr, &lfl[r << 3],

                                      (int)cm->bit_depth);

     } else {

+#endif  // CONFIG_VP9_HIGHBITDEPTH

       filter_selectively_vert(dst->buf, dst->stride,

                               mask_16x16_c & border_mask,

                               mask_8x8_c & border_mask,

                               mask_4x4_c & border_mask,

                               mask_4x4_int[r],

-                              &cm->lf_info, &lfl[r << 3]);

+                              cm->lf_info.lfthr, &lfl[r << 3]);

+#if CONFIG_VP9_HIGHBITDEPTH

-#else

-    filter_selectively_vert(dst->buf, dst->stride,

-                            mask_16x16_c & border_mask,

-                            mask_8x8_c & border_mask,

-                            mask_4x4_c & border_mask,

-                            mask_4x4_int[r],

-                            &cm->lf_info, &lfl[r << 3]);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

     dst->buf += 8 * dst->stride;

     mi_8x8 += row_step_stride;

@@ -1299,23 +1266,18 @@

                                       mask_8x8_r,

                                       mask_4x4_r,

                                       mask_4x4_int_r,

-                                      &cm->lf_info, &lfl[r << 3],

+                                      cm->lf_info.lfthr, &lfl[r << 3],

                                       (int)cm->bit_depth);

     } else {

+#endif  // CONFIG_VP9_HIGHBITDEPTH

       filter_selectively_horiz(dst->buf, dst->stride,

                                mask_16x16_r,

                                mask_8x8_r,

                                mask_4x4_r,

                                mask_4x4_int_r,

-                               &cm->lf_info, &lfl[r << 3]);

+                               cm->lf_info.lfthr, &lfl[r << 3]);

+#if CONFIG_VP9_HIGHBITDEPTH

-#else

-    filter_selectively_horiz(dst->buf, dst->stride,

-                             mask_16x16_r,

-                             mask_8x8_r,

-                             mask_4x4_r,

-                             mask_4x4_int_r,

-                             &cm->lf_info, &lfl[r << 3]);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

     dst->buf += 8 * dst->stride;

@@ -1337,27 +1299,20 @@

   // Vertical pass: do 2 rows at one time

   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {

-    unsigned int mask_16x16_l = mask_16x16 & 0xffff;

-    unsigned int mask_8x8_l = mask_8x8 & 0xffff;

-    unsigned int mask_4x4_l = mask_4x4 & 0xffff;

-    unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;

-// Disable filtering on the leftmost column.

+    // Disable filtering on the leftmost column.

 #if CONFIG_VP9_HIGHBITDEPTH

     if (cm->use_highbitdepth) {

       highbd_filter_selectively_vert_row2(

           plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,

-          mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,

+          mask_16x16, mask_8x8, mask_4x4, mask_4x4_int, cm->lf_info.lfthr,

           &lfm->lfl_y[r << 3], (int)cm->bit_depth);

     } else {

+#endif  // CONFIG_VP9_HIGHBITDEPTH

       filter_selectively_vert_row2(

-          plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,

-          mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);

+          plane->subsampling_x, dst->buf, dst->stride, mask_16x16, mask_8x8,

+          mask_4x4, mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);

+#if CONFIG_VP9_HIGHBITDEPTH

-#else

-    filter_selectively_vert_row2(

-        plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,

-        mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

     dst->buf += 16 * dst->stride;

     mask_16x16 >>= 16;

@@ -1390,19 +1345,18 @@

 #if CONFIG_VP9_HIGHBITDEPTH

     if (cm->use_highbitdepth) {

-      highbd_filter_selectively_horiz(

-          CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,

-          mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],

-          (int)cm->bit_depth);

+      highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),

+                                      dst->stride, mask_16x16_r, mask_8x8_r,

+                                      mask_4x4_r, mask_4x4_int & 0xff,

+                                      cm->lf_info.lfthr, &lfm->lfl_y[r << 3],

+                                      (int)cm->bit_depth);

     } else {

+#endif  // CONFIG_VP9_HIGHBITDEPTH

       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,

-                               mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,

-                               &lfm->lfl_y[r << 3]);

+                               mask_4x4_r, mask_4x4_int & 0xff,

+                               cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);

+#if CONFIG_VP9_HIGHBITDEPTH

-#else

-    filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,

-                             mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,

-                             &lfm->lfl_y[r << 3]);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

     dst->buf += 8 * dst->stride;

@@ -1436,38 +1390,29 @@

       lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];

-    {

-      unsigned int mask_16x16_l = mask_16x16 & 0xff;

-      unsigned int mask_8x8_l = mask_8x8 & 0xff;

-      unsigned int mask_4x4_l = mask_4x4 & 0xff;

-      unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;

-// Disable filtering on the leftmost column.

+    // Disable filtering on the leftmost column.

 #if CONFIG_VP9_HIGHBITDEPTH

-      if (cm->use_highbitdepth) {

-        highbd_filter_selectively_vert_row2(

-            plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,

-            mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,

-            &lfl_uv[r << 1], (int)cm->bit_depth);

-      } else {

-        filter_selectively_vert_row2(

-            plane->subsampling_x, dst->buf, dst->stride,

-            mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,

-            &lfl_uv[r << 1]);

-      }

-#else

-      filter_selectively_vert_row2(

-          plane->subsampling_x, dst->buf, dst->stride,

-          mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,

-          &lfl_uv[r << 1]);

+    if (cm->use_highbitdepth) {

+      highbd_filter_selectively_vert_row2(plane->subsampling_x,

+                                          CONVERT_TO_SHORTPTR(dst->buf),

+                                          dst->stride, mask_16x16, mask_8x8,

+                                          mask_4x4, mask_4x4_int,

+                                          cm->lf_info.lfthr, &lfl_uv[r << 1],

+                                          (int)cm->bit_depth);

+    } else {

 #endif  // CONFIG_VP9_HIGHBITDEPTH

-      dst->buf += 16 * dst->stride;

-      mask_16x16 >>= 8;

-      mask_8x8 >>= 8;

-      mask_4x4 >>= 8;

-      mask_4x4_int >>= 8;

+      filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,

+                                   mask_16x16, mask_8x8, mask_4x4, mask_4x4_int,

+                                   cm->lf_info.lfthr, &lfl_uv[r << 1]);

+#if CONFIG_VP9_HIGHBITDEPTH

+#endif  // CONFIG_VP9_HIGHBITDEPTH

+    dst->buf += 16 * dst->stride;

+    mask_16x16 >>= 8;

+    mask_8x8 >>= 8;

+    mask_4x4 >>= 8;

+    mask_4x4_int >>= 8;

   // Horizontal pass

@@ -1499,17 +1444,16 @@

     if (cm->use_highbitdepth) {

       highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),

                                       dst->stride, mask_16x16_r, mask_8x8_r,

-                                      mask_4x4_r, mask_4x4_int_r, &cm->lf_info,

-                                      &lfl_uv[r << 1], (int)cm->bit_depth);

+                                      mask_4x4_r, mask_4x4_int_r,

+                                      cm->lf_info.lfthr, &lfl_uv[r << 1],

+                                      (int)cm->bit_depth);

     } else {

+#endif  // CONFIG_VP9_HIGHBITDEPTH

       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,

-                               mask_4x4_r, mask_4x4_int_r, &cm->lf_info,

+                               mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,

                                &lfl_uv[r << 1]);

+#if CONFIG_VP9_HIGHBITDEPTH

-#else

-    filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,

-                             mask_4x4_r, mask_4x4_int_r, &cm->lf_info,

-                             &lfl_uv[r << 1]);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

     dst->buf += 8 * dst->stride;