ref: 950b68288a4908037bd7b74c105724d813fc4eb1
parent: 798b54a006bf971e206555a563e218f0b57dcb5a
parent: 4e5e5fc52b9b8d4ba1a3a712dfea6418e1bd4fc9
author: Johann Koenig <[email protected]>
date: Tue Aug 18 19:08:51 EDT 2015
Merge "Rename vp8 loopfilter[_neon.c]"
--- a/vp8/common/arm/neon/loopfilter_neon.c
+++ /dev/null
@@ -1,550 +1,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-#include "./vpx_config.h"
-#include "vpx_ports/arm.h"
-
-static INLINE void vp8_loop_filter_neon(
- uint8x16_t qblimit, // flimit
- uint8x16_t qlimit, // limit
- uint8x16_t qthresh, // thresh
- uint8x16_t q3, // p3
- uint8x16_t q4, // p2
- uint8x16_t q5, // p1
- uint8x16_t q6, // p0
- uint8x16_t q7, // q0
- uint8x16_t q8, // q1
- uint8x16_t q9, // q2
- uint8x16_t q10, // q3
- uint8x16_t *q5r, // p1
- uint8x16_t *q6r, // p0
- uint8x16_t *q7r, // q0
- uint8x16_t *q8r) { // q1
- uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
- int16x8_t q2s16, q11s16;
- uint16x8_t q4u16;
- int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
- int8x8_t d2s8, d3s8;
-
- q11u8 = vabdq_u8(q3, q4);
- q12u8 = vabdq_u8(q4, q5);
- q13u8 = vabdq_u8(q5, q6);
- q14u8 = vabdq_u8(q8, q7);
- q3 = vabdq_u8(q9, q8);
- q4 = vabdq_u8(q10, q9);
-
- q11u8 = vmaxq_u8(q11u8, q12u8);
- q12u8 = vmaxq_u8(q13u8, q14u8);
- q3 = vmaxq_u8(q3, q4);
- q15u8 = vmaxq_u8(q11u8, q12u8);
-
- q9 = vabdq_u8(q6, q7);
-
- // vp8_hevmask
- q13u8 = vcgtq_u8(q13u8, qthresh);
- q14u8 = vcgtq_u8(q14u8, qthresh);
- q15u8 = vmaxq_u8(q15u8, q3);
-
- q2u8 = vabdq_u8(q5, q8);
- q9 = vqaddq_u8(q9, q9);
-
- q15u8 = vcgeq_u8(qlimit, q15u8);
-
- // vp8_filter() function
- // convert to signed
- q10 = vdupq_n_u8(0x80);
- q8 = veorq_u8(q8, q10);
- q7 = veorq_u8(q7, q10);
- q6 = veorq_u8(q6, q10);
- q5 = veorq_u8(q5, q10);
-
- q2u8 = vshrq_n_u8(q2u8, 1);
- q9 = vqaddq_u8(q9, q2u8);
-
- q10 = vdupq_n_u8(3);
-
- q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
- vget_low_s8(vreinterpretq_s8_u8(q6)));
- q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
- vget_high_s8(vreinterpretq_s8_u8(q6)));
-
- q9 = vcgeq_u8(qblimit, q9);
-
- q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
- vreinterpretq_s8_u8(q8));
-
- q14u8 = vorrq_u8(q13u8, q14u8);
-
- q4u16 = vmovl_u8(vget_low_u8(q10));
- q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
- q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
-
- q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
- q15u8 = vandq_u8(q15u8, q9);
-
- q1s8 = vreinterpretq_s8_u8(q1u8);
- q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
- q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
-
- q9 = vdupq_n_u8(4);
- // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
- d2s8 = vqmovn_s16(q2s16);
- d3s8 = vqmovn_s16(q11s16);
- q1s8 = vcombine_s8(d2s8, d3s8);
- q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
- q1s8 = vreinterpretq_s8_u8(q1u8);
-
- q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
- q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
- q2s8 = vshrq_n_s8(q2s8, 3);
- q1s8 = vshrq_n_s8(q1s8, 3);
-
- q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
- q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
-
- q1s8 = vrshrq_n_s8(q1s8, 1);
- q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
-
- q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
- q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
-
- q0u8 = vdupq_n_u8(0x80);
- *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
- *q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
- *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
- *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
- return;
-}
-
-void vp8_loop_filter_horizontal_edge_y_neon(
- unsigned char *src,
- int pitch,
- unsigned char blimit,
- unsigned char limit,
- unsigned char thresh) {
- uint8x16_t qblimit, qlimit, qthresh, q3, q4;
- uint8x16_t q5, q6, q7, q8, q9, q10;
-
- qblimit = vdupq_n_u8(blimit);
- qlimit = vdupq_n_u8(limit);
- qthresh = vdupq_n_u8(thresh);
- src -= (pitch << 2);
-
- q3 = vld1q_u8(src);
- src += pitch;
- q4 = vld1q_u8(src);
- src += pitch;
- q5 = vld1q_u8(src);
- src += pitch;
- q6 = vld1q_u8(src);
- src += pitch;
- q7 = vld1q_u8(src);
- src += pitch;
- q8 = vld1q_u8(src);
- src += pitch;
- q9 = vld1q_u8(src);
- src += pitch;
- q10 = vld1q_u8(src);
-
- vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
- q5, q6, q7, q8, q9, q10,
- &q5, &q6, &q7, &q8);
-
- src -= (pitch * 5);
- vst1q_u8(src, q5);
- src += pitch;
- vst1q_u8(src, q6);
- src += pitch;
- vst1q_u8(src, q7);
- src += pitch;
- vst1q_u8(src, q8);
- return;
-}
-
-void vp8_loop_filter_horizontal_edge_uv_neon(
- unsigned char *u,
- int pitch,
- unsigned char blimit,
- unsigned char limit,
- unsigned char thresh,
- unsigned char *v) {
- uint8x16_t qblimit, qlimit, qthresh, q3, q4;
- uint8x16_t q5, q6, q7, q8, q9, q10;
- uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
- uint8x8_t d15, d16, d17, d18, d19, d20, d21;
-
- qblimit = vdupq_n_u8(blimit);
- qlimit = vdupq_n_u8(limit);
- qthresh = vdupq_n_u8(thresh);
-
- u -= (pitch << 2);
- v -= (pitch << 2);
-
- d6 = vld1_u8(u);
- u += pitch;
- d7 = vld1_u8(v);
- v += pitch;
- d8 = vld1_u8(u);
- u += pitch;
- d9 = vld1_u8(v);
- v += pitch;
- d10 = vld1_u8(u);
- u += pitch;
- d11 = vld1_u8(v);
- v += pitch;
- d12 = vld1_u8(u);
- u += pitch;
- d13 = vld1_u8(v);
- v += pitch;
- d14 = vld1_u8(u);
- u += pitch;
- d15 = vld1_u8(v);
- v += pitch;
- d16 = vld1_u8(u);
- u += pitch;
- d17 = vld1_u8(v);
- v += pitch;
- d18 = vld1_u8(u);
- u += pitch;
- d19 = vld1_u8(v);
- v += pitch;
- d20 = vld1_u8(u);
- d21 = vld1_u8(v);
-
- q3 = vcombine_u8(d6, d7);
- q4 = vcombine_u8(d8, d9);
- q5 = vcombine_u8(d10, d11);
- q6 = vcombine_u8(d12, d13);
- q7 = vcombine_u8(d14, d15);
- q8 = vcombine_u8(d16, d17);
- q9 = vcombine_u8(d18, d19);
- q10 = vcombine_u8(d20, d21);
-
- vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
- q5, q6, q7, q8, q9, q10,
- &q5, &q6, &q7, &q8);
-
- u -= (pitch * 5);
- vst1_u8(u, vget_low_u8(q5));
- u += pitch;
- vst1_u8(u, vget_low_u8(q6));
- u += pitch;
- vst1_u8(u, vget_low_u8(q7));
- u += pitch;
- vst1_u8(u, vget_low_u8(q8));
-
- v -= (pitch * 5);
- vst1_u8(v, vget_high_u8(q5));
- v += pitch;
- vst1_u8(v, vget_high_u8(q6));
- v += pitch;
- vst1_u8(v, vget_high_u8(q7));
- v += pitch;
- vst1_u8(v, vget_high_u8(q8));
- return;
-}
-
-static INLINE void write_4x8(unsigned char *dst, int pitch,
- const uint8x8x4_t result) {
-#ifdef VPX_INCOMPATIBLE_GCC
- /*
- * uint8x8x4_t result
- 00 01 02 03 | 04 05 06 07
- 10 11 12 13 | 14 15 16 17
- 20 21 22 23 | 24 25 26 27
- 30 31 32 33 | 34 35 36 37
- ---
- * after vtrn_u16
- 00 01 20 21 | 04 05 24 25
- 02 03 22 23 | 06 07 26 27
- 10 11 30 31 | 14 15 34 35
- 12 13 32 33 | 16 17 36 37
- ---
- * after vtrn_u8
- 00 10 20 30 | 04 14 24 34
- 01 11 21 31 | 05 15 25 35
- 02 12 22 32 | 06 16 26 36
- 03 13 23 33 | 07 17 27 37
- */
- const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
- vreinterpret_u16_u8(result.val[2]));
- const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
- vreinterpret_u16_u8(result.val[3]));
- const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
- vreinterpret_u8_u16(r13_u16.val[0]));
- const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
- vreinterpret_u8_u16(r13_u16.val[1]));
- const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
- const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
- const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
- const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
- vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
- dst += pitch;
- vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
-#else
- vst4_lane_u8(dst, result, 0);
- dst += pitch;
- vst4_lane_u8(dst, result, 1);
- dst += pitch;
- vst4_lane_u8(dst, result, 2);
- dst += pitch;
- vst4_lane_u8(dst, result, 3);
- dst += pitch;
- vst4_lane_u8(dst, result, 4);
- dst += pitch;
- vst4_lane_u8(dst, result, 5);
- dst += pitch;
- vst4_lane_u8(dst, result, 6);
- dst += pitch;
- vst4_lane_u8(dst, result, 7);
-#endif // VPX_INCOMPATIBLE_GCC
-}
-
-void vp8_loop_filter_vertical_edge_y_neon(
- unsigned char *src,
- int pitch,
- unsigned char blimit,
- unsigned char limit,
- unsigned char thresh) {
- unsigned char *s, *d;
- uint8x16_t qblimit, qlimit, qthresh, q3, q4;
- uint8x16_t q5, q6, q7, q8, q9, q10;
- uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
- uint8x8_t d15, d16, d17, d18, d19, d20, d21;
- uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
- uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
- uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
- uint8x8x4_t q4ResultH, q4ResultL;
-
- qblimit = vdupq_n_u8(blimit);
- qlimit = vdupq_n_u8(limit);
- qthresh = vdupq_n_u8(thresh);
-
- s = src - 4;
- d6 = vld1_u8(s);
- s += pitch;
- d8 = vld1_u8(s);
- s += pitch;
- d10 = vld1_u8(s);
- s += pitch;
- d12 = vld1_u8(s);
- s += pitch;
- d14 = vld1_u8(s);
- s += pitch;
- d16 = vld1_u8(s);
- s += pitch;
- d18 = vld1_u8(s);
- s += pitch;
- d20 = vld1_u8(s);
- s += pitch;
- d7 = vld1_u8(s);
- s += pitch;
- d9 = vld1_u8(s);
- s += pitch;
- d11 = vld1_u8(s);
- s += pitch;
- d13 = vld1_u8(s);
- s += pitch;
- d15 = vld1_u8(s);
- s += pitch;
- d17 = vld1_u8(s);
- s += pitch;
- d19 = vld1_u8(s);
- s += pitch;
- d21 = vld1_u8(s);
-
- q3 = vcombine_u8(d6, d7);
- q4 = vcombine_u8(d8, d9);
- q5 = vcombine_u8(d10, d11);
- q6 = vcombine_u8(d12, d13);
- q7 = vcombine_u8(d14, d15);
- q8 = vcombine_u8(d16, d17);
- q9 = vcombine_u8(d18, d19);
- q10 = vcombine_u8(d20, d21);
-
- q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
- q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
- q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
- q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
-
- q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
- vreinterpretq_u16_u32(q2tmp2.val[0]));
- q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
- vreinterpretq_u16_u32(q2tmp3.val[0]));
- q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
- vreinterpretq_u16_u32(q2tmp2.val[1]));
- q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
- vreinterpretq_u16_u32(q2tmp3.val[1]));
-
- q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
- vreinterpretq_u8_u16(q2tmp5.val[0]));
- q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
- vreinterpretq_u8_u16(q2tmp5.val[1]));
- q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
- vreinterpretq_u8_u16(q2tmp7.val[0]));
- q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
- vreinterpretq_u8_u16(q2tmp7.val[1]));
-
- q3 = q2tmp8.val[0];
- q4 = q2tmp8.val[1];
- q5 = q2tmp9.val[0];
- q6 = q2tmp9.val[1];
- q7 = q2tmp10.val[0];
- q8 = q2tmp10.val[1];
- q9 = q2tmp11.val[0];
- q10 = q2tmp11.val[1];
-
- vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
- q5, q6, q7, q8, q9, q10,
- &q5, &q6, &q7, &q8);
-
- q4ResultL.val[0] = vget_low_u8(q5); // d10
- q4ResultL.val[1] = vget_low_u8(q6); // d12
- q4ResultL.val[2] = vget_low_u8(q7); // d14
- q4ResultL.val[3] = vget_low_u8(q8); // d16
- q4ResultH.val[0] = vget_high_u8(q5); // d11
- q4ResultH.val[1] = vget_high_u8(q6); // d13
- q4ResultH.val[2] = vget_high_u8(q7); // d15
- q4ResultH.val[3] = vget_high_u8(q8); // d17
-
- d = src - 2;
- write_4x8(d, pitch, q4ResultL);
- d += pitch * 8;
- write_4x8(d, pitch, q4ResultH);
-}
-
-void vp8_loop_filter_vertical_edge_uv_neon(
- unsigned char *u,
- int pitch,
- unsigned char blimit,
- unsigned char limit,
- unsigned char thresh,
- unsigned char *v) {
- unsigned char *us, *ud;
- unsigned char *vs, *vd;
- uint8x16_t qblimit, qlimit, qthresh, q3, q4;
- uint8x16_t q5, q6, q7, q8, q9, q10;
- uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
- uint8x8_t d15, d16, d17, d18, d19, d20, d21;
- uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
- uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
- uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
- uint8x8x4_t q4ResultH, q4ResultL;
-
- qblimit = vdupq_n_u8(blimit);
- qlimit = vdupq_n_u8(limit);
- qthresh = vdupq_n_u8(thresh);
-
- us = u - 4;
- d6 = vld1_u8(us);
- us += pitch;
- d8 = vld1_u8(us);
- us += pitch;
- d10 = vld1_u8(us);
- us += pitch;
- d12 = vld1_u8(us);
- us += pitch;
- d14 = vld1_u8(us);
- us += pitch;
- d16 = vld1_u8(us);
- us += pitch;
- d18 = vld1_u8(us);
- us += pitch;
- d20 = vld1_u8(us);
-
- vs = v - 4;
- d7 = vld1_u8(vs);
- vs += pitch;
- d9 = vld1_u8(vs);
- vs += pitch;
- d11 = vld1_u8(vs);
- vs += pitch;
- d13 = vld1_u8(vs);
- vs += pitch;
- d15 = vld1_u8(vs);
- vs += pitch;
- d17 = vld1_u8(vs);
- vs += pitch;
- d19 = vld1_u8(vs);
- vs += pitch;
- d21 = vld1_u8(vs);
-
- q3 = vcombine_u8(d6, d7);
- q4 = vcombine_u8(d8, d9);
- q5 = vcombine_u8(d10, d11);
- q6 = vcombine_u8(d12, d13);
- q7 = vcombine_u8(d14, d15);
- q8 = vcombine_u8(d16, d17);
- q9 = vcombine_u8(d18, d19);
- q10 = vcombine_u8(d20, d21);
-
- q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
- q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
- q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
- q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
-
- q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
- vreinterpretq_u16_u32(q2tmp2.val[0]));
- q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
- vreinterpretq_u16_u32(q2tmp3.val[0]));
- q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
- vreinterpretq_u16_u32(q2tmp2.val[1]));
- q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
- vreinterpretq_u16_u32(q2tmp3.val[1]));
-
- q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
- vreinterpretq_u8_u16(q2tmp5.val[0]));
- q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
- vreinterpretq_u8_u16(q2tmp5.val[1]));
- q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
- vreinterpretq_u8_u16(q2tmp7.val[0]));
- q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
- vreinterpretq_u8_u16(q2tmp7.val[1]));
-
- q3 = q2tmp8.val[0];
- q4 = q2tmp8.val[1];
- q5 = q2tmp9.val[0];
- q6 = q2tmp9.val[1];
- q7 = q2tmp10.val[0];
- q8 = q2tmp10.val[1];
- q9 = q2tmp11.val[0];
- q10 = q2tmp11.val[1];
-
- vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
- q5, q6, q7, q8, q9, q10,
- &q5, &q6, &q7, &q8);
-
- q4ResultL.val[0] = vget_low_u8(q5); // d10
- q4ResultL.val[1] = vget_low_u8(q6); // d12
- q4ResultL.val[2] = vget_low_u8(q7); // d14
- q4ResultL.val[3] = vget_low_u8(q8); // d16
- ud = u - 2;
- write_4x8(ud, pitch, q4ResultL);
-
- q4ResultH.val[0] = vget_high_u8(q5); // d11
- q4ResultH.val[1] = vget_high_u8(q6); // d13
- q4ResultH.val[2] = vget_high_u8(q7); // d15
- q4ResultH.val[3] = vget_high_u8(q8); // d17
- vd = v - 2;
- write_4x8(vd, pitch, q4ResultH);
-}
--- /dev/null
+++ b/vp8/common/arm/neon/vp8_loopfilter_neon.c
@@ -1,0 +1,550 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+#include "vpx_ports/arm.h"
+
+static INLINE void vp8_loop_filter_neon(
+ uint8x16_t qblimit, // flimit
+ uint8x16_t qlimit, // limit
+ uint8x16_t qthresh, // thresh
+ uint8x16_t q3, // p3
+ uint8x16_t q4, // p2
+ uint8x16_t q5, // p1
+ uint8x16_t q6, // p0
+ uint8x16_t q7, // q0
+ uint8x16_t q8, // q1
+ uint8x16_t q9, // q2
+ uint8x16_t q10, // q3
+ uint8x16_t *q5r, // p1
+ uint8x16_t *q6r, // p0
+ uint8x16_t *q7r, // q0
+ uint8x16_t *q8r) { // q1
+ uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
+ int16x8_t q2s16, q11s16;
+ uint16x8_t q4u16;
+ int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
+ int8x8_t d2s8, d3s8;
+
+ q11u8 = vabdq_u8(q3, q4);
+ q12u8 = vabdq_u8(q4, q5);
+ q13u8 = vabdq_u8(q5, q6);
+ q14u8 = vabdq_u8(q8, q7);
+ q3 = vabdq_u8(q9, q8);
+ q4 = vabdq_u8(q10, q9);
+
+ q11u8 = vmaxq_u8(q11u8, q12u8);
+ q12u8 = vmaxq_u8(q13u8, q14u8);
+ q3 = vmaxq_u8(q3, q4);
+ q15u8 = vmaxq_u8(q11u8, q12u8);
+
+ q9 = vabdq_u8(q6, q7);
+
+ // vp8_hevmask
+ q13u8 = vcgtq_u8(q13u8, qthresh);
+ q14u8 = vcgtq_u8(q14u8, qthresh);
+ q15u8 = vmaxq_u8(q15u8, q3);
+
+ q2u8 = vabdq_u8(q5, q8);
+ q9 = vqaddq_u8(q9, q9);
+
+ q15u8 = vcgeq_u8(qlimit, q15u8);
+
+ // vp8_filter() function
+ // convert to signed
+ q10 = vdupq_n_u8(0x80);
+ q8 = veorq_u8(q8, q10);
+ q7 = veorq_u8(q7, q10);
+ q6 = veorq_u8(q6, q10);
+ q5 = veorq_u8(q5, q10);
+
+ q2u8 = vshrq_n_u8(q2u8, 1);
+ q9 = vqaddq_u8(q9, q2u8);
+
+ q10 = vdupq_n_u8(3);
+
+ q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
+ vget_low_s8(vreinterpretq_s8_u8(q6)));
+ q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
+ vget_high_s8(vreinterpretq_s8_u8(q6)));
+
+ q9 = vcgeq_u8(qblimit, q9);
+
+ q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
+ vreinterpretq_s8_u8(q8));
+
+ q14u8 = vorrq_u8(q13u8, q14u8);
+
+ q4u16 = vmovl_u8(vget_low_u8(q10));
+ q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
+ q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
+
+ q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
+ q15u8 = vandq_u8(q15u8, q9);
+
+ q1s8 = vreinterpretq_s8_u8(q1u8);
+ q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
+ q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
+
+ q9 = vdupq_n_u8(4);
+ // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+ d2s8 = vqmovn_s16(q2s16);
+ d3s8 = vqmovn_s16(q11s16);
+ q1s8 = vcombine_s8(d2s8, d3s8);
+ q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
+ q1s8 = vreinterpretq_s8_u8(q1u8);
+
+ q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
+ q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
+ q2s8 = vshrq_n_s8(q2s8, 3);
+ q1s8 = vshrq_n_s8(q1s8, 3);
+
+ q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
+ q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
+
+ q1s8 = vrshrq_n_s8(q1s8, 1);
+ q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
+
+ q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
+ q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
+
+ q0u8 = vdupq_n_u8(0x80);
+ *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
+ *q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
+ *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
+ *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
+ return;
+}
+
+void vp8_loop_filter_horizontal_edge_y_neon(
+ unsigned char *src,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh) {
+ uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+ uint8x16_t q5, q6, q7, q8, q9, q10;
+
+ qblimit = vdupq_n_u8(blimit);
+ qlimit = vdupq_n_u8(limit);
+ qthresh = vdupq_n_u8(thresh);
+ src -= (pitch << 2);
+
+ q3 = vld1q_u8(src);
+ src += pitch;
+ q4 = vld1q_u8(src);
+ src += pitch;
+ q5 = vld1q_u8(src);
+ src += pitch;
+ q6 = vld1q_u8(src);
+ src += pitch;
+ q7 = vld1q_u8(src);
+ src += pitch;
+ q8 = vld1q_u8(src);
+ src += pitch;
+ q9 = vld1q_u8(src);
+ src += pitch;
+ q10 = vld1q_u8(src);
+
+ vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+ q5, q6, q7, q8, q9, q10,
+ &q5, &q6, &q7, &q8);
+
+ src -= (pitch * 5);
+ vst1q_u8(src, q5);
+ src += pitch;
+ vst1q_u8(src, q6);
+ src += pitch;
+ vst1q_u8(src, q7);
+ src += pitch;
+ vst1q_u8(src, q8);
+ return;
+}
+
+void vp8_loop_filter_horizontal_edge_uv_neon(
+ unsigned char *u,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh,
+ unsigned char *v) {
+ uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+ uint8x16_t q5, q6, q7, q8, q9, q10;
+ uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+ uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+
+ qblimit = vdupq_n_u8(blimit);
+ qlimit = vdupq_n_u8(limit);
+ qthresh = vdupq_n_u8(thresh);
+
+ u -= (pitch << 2);
+ v -= (pitch << 2);
+
+ d6 = vld1_u8(u);
+ u += pitch;
+ d7 = vld1_u8(v);
+ v += pitch;
+ d8 = vld1_u8(u);
+ u += pitch;
+ d9 = vld1_u8(v);
+ v += pitch;
+ d10 = vld1_u8(u);
+ u += pitch;
+ d11 = vld1_u8(v);
+ v += pitch;
+ d12 = vld1_u8(u);
+ u += pitch;
+ d13 = vld1_u8(v);
+ v += pitch;
+ d14 = vld1_u8(u);
+ u += pitch;
+ d15 = vld1_u8(v);
+ v += pitch;
+ d16 = vld1_u8(u);
+ u += pitch;
+ d17 = vld1_u8(v);
+ v += pitch;
+ d18 = vld1_u8(u);
+ u += pitch;
+ d19 = vld1_u8(v);
+ v += pitch;
+ d20 = vld1_u8(u);
+ d21 = vld1_u8(v);
+
+ q3 = vcombine_u8(d6, d7);
+ q4 = vcombine_u8(d8, d9);
+ q5 = vcombine_u8(d10, d11);
+ q6 = vcombine_u8(d12, d13);
+ q7 = vcombine_u8(d14, d15);
+ q8 = vcombine_u8(d16, d17);
+ q9 = vcombine_u8(d18, d19);
+ q10 = vcombine_u8(d20, d21);
+
+ vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+ q5, q6, q7, q8, q9, q10,
+ &q5, &q6, &q7, &q8);
+
+ u -= (pitch * 5);
+ vst1_u8(u, vget_low_u8(q5));
+ u += pitch;
+ vst1_u8(u, vget_low_u8(q6));
+ u += pitch;
+ vst1_u8(u, vget_low_u8(q7));
+ u += pitch;
+ vst1_u8(u, vget_low_u8(q8));
+
+ v -= (pitch * 5);
+ vst1_u8(v, vget_high_u8(q5));
+ v += pitch;
+ vst1_u8(v, vget_high_u8(q6));
+ v += pitch;
+ vst1_u8(v, vget_high_u8(q7));
+ v += pitch;
+ vst1_u8(v, vget_high_u8(q8));
+ return;
+}
+
+static INLINE void write_4x8(unsigned char *dst, int pitch,
+ const uint8x8x4_t result) {
+#ifdef VPX_INCOMPATIBLE_GCC
+ /*
+ * uint8x8x4_t result
+ 00 01 02 03 | 04 05 06 07
+ 10 11 12 13 | 14 15 16 17
+ 20 21 22 23 | 24 25 26 27
+ 30 31 32 33 | 34 35 36 37
+ ---
+ * after vtrn_u16
+ 00 01 20 21 | 04 05 24 25
+ 02 03 22 23 | 06 07 26 27
+ 10 11 30 31 | 14 15 34 35
+ 12 13 32 33 | 16 17 36 37
+ ---
+ * after vtrn_u8
+ 00 10 20 30 | 04 14 24 34
+ 01 11 21 31 | 05 15 25 35
+ 02 12 22 32 | 06 16 26 36
+ 03 13 23 33 | 07 17 27 37
+ */
+ const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
+ vreinterpret_u16_u8(result.val[2]));
+ const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
+ vreinterpret_u16_u8(result.val[3]));
+ const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
+ vreinterpret_u8_u16(r13_u16.val[0]));
+ const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
+ vreinterpret_u8_u16(r13_u16.val[1]));
+ const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
+ const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
+ const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
+ const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
+ vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
+#else
+ vst4_lane_u8(dst, result, 0);
+ dst += pitch;
+ vst4_lane_u8(dst, result, 1);
+ dst += pitch;
+ vst4_lane_u8(dst, result, 2);
+ dst += pitch;
+ vst4_lane_u8(dst, result, 3);
+ dst += pitch;
+ vst4_lane_u8(dst, result, 4);
+ dst += pitch;
+ vst4_lane_u8(dst, result, 5);
+ dst += pitch;
+ vst4_lane_u8(dst, result, 6);
+ dst += pitch;
+ vst4_lane_u8(dst, result, 7);
+#endif // VPX_INCOMPATIBLE_GCC
+}
+
+void vp8_loop_filter_vertical_edge_y_neon(
+ unsigned char *src,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh) {
+ unsigned char *s, *d;
+ uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+ uint8x16_t q5, q6, q7, q8, q9, q10;
+ uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+ uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+ uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+ uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+ uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+ uint8x8x4_t q4ResultH, q4ResultL;
+
+ qblimit = vdupq_n_u8(blimit);
+ qlimit = vdupq_n_u8(limit);
+ qthresh = vdupq_n_u8(thresh);
+
+ s = src - 4;
+ d6 = vld1_u8(s);
+ s += pitch;
+ d8 = vld1_u8(s);
+ s += pitch;
+ d10 = vld1_u8(s);
+ s += pitch;
+ d12 = vld1_u8(s);
+ s += pitch;
+ d14 = vld1_u8(s);
+ s += pitch;
+ d16 = vld1_u8(s);
+ s += pitch;
+ d18 = vld1_u8(s);
+ s += pitch;
+ d20 = vld1_u8(s);
+ s += pitch;
+ d7 = vld1_u8(s);
+ s += pitch;
+ d9 = vld1_u8(s);
+ s += pitch;
+ d11 = vld1_u8(s);
+ s += pitch;
+ d13 = vld1_u8(s);
+ s += pitch;
+ d15 = vld1_u8(s);
+ s += pitch;
+ d17 = vld1_u8(s);
+ s += pitch;
+ d19 = vld1_u8(s);
+ s += pitch;
+ d21 = vld1_u8(s);
+
+ q3 = vcombine_u8(d6, d7);
+ q4 = vcombine_u8(d8, d9);
+ q5 = vcombine_u8(d10, d11);
+ q6 = vcombine_u8(d12, d13);
+ q7 = vcombine_u8(d14, d15);
+ q8 = vcombine_u8(d16, d17);
+ q9 = vcombine_u8(d18, d19);
+ q10 = vcombine_u8(d20, d21);
+
+ q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+ q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+ q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+ q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+ q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+ vreinterpretq_u16_u32(q2tmp2.val[0]));
+ q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+ vreinterpretq_u16_u32(q2tmp3.val[0]));
+ q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+ vreinterpretq_u16_u32(q2tmp2.val[1]));
+ q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+ vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+ q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+ vreinterpretq_u8_u16(q2tmp5.val[0]));
+ q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+ vreinterpretq_u8_u16(q2tmp5.val[1]));
+ q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+ vreinterpretq_u8_u16(q2tmp7.val[0]));
+ q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+ vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+ q3 = q2tmp8.val[0];
+ q4 = q2tmp8.val[1];
+ q5 = q2tmp9.val[0];
+ q6 = q2tmp9.val[1];
+ q7 = q2tmp10.val[0];
+ q8 = q2tmp10.val[1];
+ q9 = q2tmp11.val[0];
+ q10 = q2tmp11.val[1];
+
+ vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+ q5, q6, q7, q8, q9, q10,
+ &q5, &q6, &q7, &q8);
+
+ q4ResultL.val[0] = vget_low_u8(q5); // d10
+ q4ResultL.val[1] = vget_low_u8(q6); // d12
+ q4ResultL.val[2] = vget_low_u8(q7); // d14
+ q4ResultL.val[3] = vget_low_u8(q8); // d16
+ q4ResultH.val[0] = vget_high_u8(q5); // d11
+ q4ResultH.val[1] = vget_high_u8(q6); // d13
+ q4ResultH.val[2] = vget_high_u8(q7); // d15
+ q4ResultH.val[3] = vget_high_u8(q8); // d17
+
+ d = src - 2;
+ write_4x8(d, pitch, q4ResultL);
+ d += pitch * 8;
+ write_4x8(d, pitch, q4ResultH);
+}
+
+void vp8_loop_filter_vertical_edge_uv_neon(
+ unsigned char *u,
+ int pitch,
+ unsigned char blimit,
+ unsigned char limit,
+ unsigned char thresh,
+ unsigned char *v) {
+ unsigned char *us, *ud;
+ unsigned char *vs, *vd;
+ uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+ uint8x16_t q5, q6, q7, q8, q9, q10;
+ uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+ uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+ uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+ uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+ uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+ uint8x8x4_t q4ResultH, q4ResultL;
+
+ qblimit = vdupq_n_u8(blimit);
+ qlimit = vdupq_n_u8(limit);
+ qthresh = vdupq_n_u8(thresh);
+
+ us = u - 4;
+ d6 = vld1_u8(us);
+ us += pitch;
+ d8 = vld1_u8(us);
+ us += pitch;
+ d10 = vld1_u8(us);
+ us += pitch;
+ d12 = vld1_u8(us);
+ us += pitch;
+ d14 = vld1_u8(us);
+ us += pitch;
+ d16 = vld1_u8(us);
+ us += pitch;
+ d18 = vld1_u8(us);
+ us += pitch;
+ d20 = vld1_u8(us);
+
+ vs = v - 4;
+ d7 = vld1_u8(vs);
+ vs += pitch;
+ d9 = vld1_u8(vs);
+ vs += pitch;
+ d11 = vld1_u8(vs);
+ vs += pitch;
+ d13 = vld1_u8(vs);
+ vs += pitch;
+ d15 = vld1_u8(vs);
+ vs += pitch;
+ d17 = vld1_u8(vs);
+ vs += pitch;
+ d19 = vld1_u8(vs);
+ vs += pitch;
+ d21 = vld1_u8(vs);
+
+ q3 = vcombine_u8(d6, d7);
+ q4 = vcombine_u8(d8, d9);
+ q5 = vcombine_u8(d10, d11);
+ q6 = vcombine_u8(d12, d13);
+ q7 = vcombine_u8(d14, d15);
+ q8 = vcombine_u8(d16, d17);
+ q9 = vcombine_u8(d18, d19);
+ q10 = vcombine_u8(d20, d21);
+
+ q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+ q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+ q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+ q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+ q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+ vreinterpretq_u16_u32(q2tmp2.val[0]));
+ q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+ vreinterpretq_u16_u32(q2tmp3.val[0]));
+ q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+ vreinterpretq_u16_u32(q2tmp2.val[1]));
+ q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+ vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+ q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+ vreinterpretq_u8_u16(q2tmp5.val[0]));
+ q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+ vreinterpretq_u8_u16(q2tmp5.val[1]));
+ q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+ vreinterpretq_u8_u16(q2tmp7.val[0]));
+ q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+ vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+ q3 = q2tmp8.val[0];
+ q4 = q2tmp8.val[1];
+ q5 = q2tmp9.val[0];
+ q6 = q2tmp9.val[1];
+ q7 = q2tmp10.val[0];
+ q8 = q2tmp10.val[1];
+ q9 = q2tmp11.val[0];
+ q10 = q2tmp11.val[1];
+
+ vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+ q5, q6, q7, q8, q9, q10,
+ &q5, &q6, &q7, &q8);
+
+ q4ResultL.val[0] = vget_low_u8(q5); // d10
+ q4ResultL.val[1] = vget_low_u8(q6); // d12
+ q4ResultL.val[2] = vget_low_u8(q7); // d14
+ q4ResultL.val[3] = vget_low_u8(q8); // d16
+ ud = u - 2;
+ write_4x8(ud, pitch, q4ResultL);
+
+ q4ResultH.val[0] = vget_high_u8(q5); // d11
+ q4ResultH.val[1] = vget_high_u8(q6); // d13
+ q4ResultH.val[2] = vget_high_u8(q7); // d15
+ q4ResultH.val[3] = vget_high_u8(q8); // d17
+ vd = v - 2;
+ write_4x8(vd, pitch, q4ResultH);
+}
--- a/vp8/common/loopfilter.c
+++ /dev/null
@@ -1,661 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_config.h"
-#include "vp8_rtcd.h"
-#include "loopfilter.h"
-#include "onyxc_int.h"
-#include "vpx_mem/vpx_mem.h"
-
-
-static void lf_init_lut(loop_filter_info_n *lfi)
-{
- int filt_lvl;
-
- for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++)
- {
- if (filt_lvl >= 40)
- {
- lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
- lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
- }
- else if (filt_lvl >= 20)
- {
- lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
- lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
- }
- else if (filt_lvl >= 15)
- {
- lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
- lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
- }
- else
- {
- lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
- lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
- }
- }
-
- lfi->mode_lf_lut[DC_PRED] = 1;
- lfi->mode_lf_lut[V_PRED] = 1;
- lfi->mode_lf_lut[H_PRED] = 1;
- lfi->mode_lf_lut[TM_PRED] = 1;
- lfi->mode_lf_lut[B_PRED] = 0;
-
- lfi->mode_lf_lut[ZEROMV] = 1;
- lfi->mode_lf_lut[NEARESTMV] = 2;
- lfi->mode_lf_lut[NEARMV] = 2;
- lfi->mode_lf_lut[NEWMV] = 2;
- lfi->mode_lf_lut[SPLITMV] = 3;
-
-}
-
-void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
- int sharpness_lvl)
-{
- int i;
-
- /* For each possible value for the loop filter fill out limits */
- for (i = 0; i <= MAX_LOOP_FILTER; i++)
- {
- int filt_lvl = i;
- int block_inside_limit = 0;
-
- /* Set loop filter paramaeters that control sharpness. */
- block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
- block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
-
- if (sharpness_lvl > 0)
- {
- if (block_inside_limit > (9 - sharpness_lvl))
- block_inside_limit = (9 - sharpness_lvl);
- }
-
- if (block_inside_limit < 1)
- block_inside_limit = 1;
-
- memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
- memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH);
- memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
- SIMD_WIDTH);
- }
-}
-
-void vp8_loop_filter_init(VP8_COMMON *cm)
-{
- loop_filter_info_n *lfi = &cm->lf_info;
- int i;
-
- /* init limits for given sharpness*/
- vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
- cm->last_sharpness_level = cm->sharpness_level;
-
- /* init LUT for lvl and hev thr picking */
- lf_init_lut(lfi);
-
- /* init hev threshold const vectors */
- for(i = 0; i < 4 ; i++)
- {
- memset(lfi->hev_thr[i], i, SIMD_WIDTH);
- }
-}
-
-void vp8_loop_filter_frame_init(VP8_COMMON *cm,
- MACROBLOCKD *mbd,
- int default_filt_lvl)
-{
- int seg, /* segment number */
- ref, /* index in ref_lf_deltas */
- mode; /* index in mode_lf_deltas */
-
- loop_filter_info_n *lfi = &cm->lf_info;
-
- /* update limits if sharpness has changed */
- if(cm->last_sharpness_level != cm->sharpness_level)
- {
- vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
- cm->last_sharpness_level = cm->sharpness_level;
- }
-
- for(seg = 0; seg < MAX_MB_SEGMENTS; seg++)
- {
- int lvl_seg = default_filt_lvl;
- int lvl_ref, lvl_mode;
-
- /* Note the baseline filter values for each segment */
- if (mbd->segmentation_enabled)
- {
- /* Abs value */
- if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
- {
- lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
- }
- else /* Delta Value */
- {
- lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
- lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
- }
- }
-
- if (!mbd->mode_ref_lf_delta_enabled)
- {
- /* we could get rid of this if we assume that deltas are set to
- * zero when not in use; encoder always uses deltas
- */
- memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
- continue;
- }
-
- /* INTRA_FRAME */
- ref = INTRA_FRAME;
-
- /* Apply delta for reference frame */
- lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref];
-
- /* Apply delta for Intra modes */
- mode = 0; /* B_PRED */
- /* Only the split mode BPRED has a further special case */
- lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
- /* clamp */
- lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0;
-
- lfi->lvl[seg][ref][mode] = lvl_mode;
-
- mode = 1; /* all the rest of Intra modes */
- /* clamp */
- lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0;
- lfi->lvl[seg][ref][mode] = lvl_mode;
-
- /* LAST, GOLDEN, ALT */
- for(ref = 1; ref < MAX_REF_FRAMES; ref++)
- {
- /* Apply delta for reference frame */
- lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref];
-
- /* Apply delta for Inter modes */
- for (mode = 1; mode < 4; mode++)
- {
- lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
- /* clamp */
- lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0;
-
- lfi->lvl[seg][ref][mode] = lvl_mode;
- }
- }
- }
-}
-
-
-void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context,
- int mb_row, int post_ystride, int post_uvstride,
- unsigned char *y_ptr, unsigned char *u_ptr,
- unsigned char *v_ptr)
-{
- int mb_col;
- int filter_level;
- loop_filter_info_n *lfi_n = &cm->lf_info;
- loop_filter_info lfi;
- FRAME_TYPE frame_type = cm->frame_type;
-
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
- {
- int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
- mode_info_context->mbmi.mode != SPLITMV &&
- mode_info_context->mbmi.mb_skip_coeff);
-
- const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
- const int seg = mode_info_context->mbmi.segment_id;
- const int ref_frame = mode_info_context->mbmi.ref_frame;
-
- filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
-
- if (filter_level)
- {
- const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
- lfi.mblim = lfi_n->mblim[filter_level];
- lfi.blim = lfi_n->blim[filter_level];
- lfi.lim = lfi_n->lim[filter_level];
- lfi.hev_thr = lfi_n->hev_thr[hev_index];
-
- if (mb_col > 0)
- vp8_loop_filter_mbv
- (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
-
- if (!skip_lf)
- vp8_loop_filter_bv
- (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- vp8_loop_filter_mbh
- (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
-
- if (!skip_lf)
- vp8_loop_filter_bh
- (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
- }
-
- y_ptr += 16;
- u_ptr += 8;
- v_ptr += 8;
-
- mode_info_context++; /* step to next MB */
- }
-
-}
-
-void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context,
- int mb_row, int post_ystride, int post_uvstride,
- unsigned char *y_ptr, unsigned char *u_ptr,
- unsigned char *v_ptr)
-{
- int mb_col;
- int filter_level;
- loop_filter_info_n *lfi_n = &cm->lf_info;
- (void)post_uvstride;
-
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
- {
- int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
- mode_info_context->mbmi.mode != SPLITMV &&
- mode_info_context->mbmi.mb_skip_coeff);
-
- const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
- const int seg = mode_info_context->mbmi.segment_id;
- const int ref_frame = mode_info_context->mbmi.ref_frame;
-
- filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
-
- if (filter_level)
- {
- if (mb_col > 0)
- vp8_loop_filter_simple_mbv
- (y_ptr, post_ystride, lfi_n->mblim[filter_level]);
-
- if (!skip_lf)
- vp8_loop_filter_simple_bv
- (y_ptr, post_ystride, lfi_n->blim[filter_level]);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- vp8_loop_filter_simple_mbh
- (y_ptr, post_ystride, lfi_n->mblim[filter_level]);
-
- if (!skip_lf)
- vp8_loop_filter_simple_bh
- (y_ptr, post_ystride, lfi_n->blim[filter_level]);
- }
-
- y_ptr += 16;
- u_ptr += 8;
- v_ptr += 8;
-
- mode_info_context++; /* step to next MB */
- }
-
-}
-void vp8_loop_filter_frame(VP8_COMMON *cm,
- MACROBLOCKD *mbd,
- int frame_type)
-{
- YV12_BUFFER_CONFIG *post = cm->frame_to_show;
- loop_filter_info_n *lfi_n = &cm->lf_info;
- loop_filter_info lfi;
-
- int mb_row;
- int mb_col;
- int mb_rows = cm->mb_rows;
- int mb_cols = cm->mb_cols;
-
- int filter_level;
-
- unsigned char *y_ptr, *u_ptr, *v_ptr;
-
- /* Point at base of Mb MODE_INFO list */
- const MODE_INFO *mode_info_context = cm->mi;
- int post_y_stride = post->y_stride;
- int post_uv_stride = post->uv_stride;
-
- /* Initialize the loop filter for this frame. */
- vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
-
- /* Set up the buffer pointers */
- y_ptr = post->y_buffer;
- u_ptr = post->u_buffer;
- v_ptr = post->v_buffer;
-
- /* vp8_filter each macro block */
- if (cm->filter_type == NORMAL_LOOPFILTER)
- {
- for (mb_row = 0; mb_row < mb_rows; mb_row++)
- {
- for (mb_col = 0; mb_col < mb_cols; mb_col++)
- {
- int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
- mode_info_context->mbmi.mode != SPLITMV &&
- mode_info_context->mbmi.mb_skip_coeff);
-
- const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
- const int seg = mode_info_context->mbmi.segment_id;
- const int ref_frame = mode_info_context->mbmi.ref_frame;
-
- filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
-
- if (filter_level)
- {
- const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
- lfi.mblim = lfi_n->mblim[filter_level];
- lfi.blim = lfi_n->blim[filter_level];
- lfi.lim = lfi_n->lim[filter_level];
- lfi.hev_thr = lfi_n->hev_thr[hev_index];
-
- if (mb_col > 0)
- vp8_loop_filter_mbv
- (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
-
- if (!skip_lf)
- vp8_loop_filter_bv
- (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- vp8_loop_filter_mbh
- (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
-
- if (!skip_lf)
- vp8_loop_filter_bh
- (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
- }
-
- y_ptr += 16;
- u_ptr += 8;
- v_ptr += 8;
-
- mode_info_context++; /* step to next MB */
- }
- y_ptr += post_y_stride * 16 - post->y_width;
- u_ptr += post_uv_stride * 8 - post->uv_width;
- v_ptr += post_uv_stride * 8 - post->uv_width;
-
- mode_info_context++; /* Skip border mb */
-
- }
- }
- else /* SIMPLE_LOOPFILTER */
- {
- for (mb_row = 0; mb_row < mb_rows; mb_row++)
- {
- for (mb_col = 0; mb_col < mb_cols; mb_col++)
- {
- int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
- mode_info_context->mbmi.mode != SPLITMV &&
- mode_info_context->mbmi.mb_skip_coeff);
-
- const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
- const int seg = mode_info_context->mbmi.segment_id;
- const int ref_frame = mode_info_context->mbmi.ref_frame;
-
- filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
- if (filter_level)
- {
- const unsigned char * mblim = lfi_n->mblim[filter_level];
- const unsigned char * blim = lfi_n->blim[filter_level];
-
- if (mb_col > 0)
- vp8_loop_filter_simple_mbv
- (y_ptr, post_y_stride, mblim);
-
- if (!skip_lf)
- vp8_loop_filter_simple_bv
- (y_ptr, post_y_stride, blim);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- vp8_loop_filter_simple_mbh
- (y_ptr, post_y_stride, mblim);
-
- if (!skip_lf)
- vp8_loop_filter_simple_bh
- (y_ptr, post_y_stride, blim);
- }
-
- y_ptr += 16;
- u_ptr += 8;
- v_ptr += 8;
-
- mode_info_context++; /* step to next MB */
- }
- y_ptr += post_y_stride * 16 - post->y_width;
- u_ptr += post_uv_stride * 8 - post->uv_width;
- v_ptr += post_uv_stride * 8 - post->uv_width;
-
- mode_info_context++; /* Skip border mb */
-
- }
- }
-}
-
-void vp8_loop_filter_frame_yonly
-(
- VP8_COMMON *cm,
- MACROBLOCKD *mbd,
- int default_filt_lvl
-)
-{
- YV12_BUFFER_CONFIG *post = cm->frame_to_show;
-
- unsigned char *y_ptr;
- int mb_row;
- int mb_col;
-
- loop_filter_info_n *lfi_n = &cm->lf_info;
- loop_filter_info lfi;
-
- int filter_level;
- FRAME_TYPE frame_type = cm->frame_type;
-
- /* Point at base of Mb MODE_INFO list */
- const MODE_INFO *mode_info_context = cm->mi;
-
-#if 0
- if(default_filt_lvl == 0) /* no filter applied */
- return;
-#endif
-
- /* Initialize the loop filter for this frame. */
- vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);
-
- /* Set up the buffer pointers */
- y_ptr = post->y_buffer;
-
- /* vp8_filter each macro block */
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
- {
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
- {
- int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
- mode_info_context->mbmi.mode != SPLITMV &&
- mode_info_context->mbmi.mb_skip_coeff);
-
- const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
- const int seg = mode_info_context->mbmi.segment_id;
- const int ref_frame = mode_info_context->mbmi.ref_frame;
-
- filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
-
- if (filter_level)
- {
- if (cm->filter_type == NORMAL_LOOPFILTER)
- {
- const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
- lfi.mblim = lfi_n->mblim[filter_level];
- lfi.blim = lfi_n->blim[filter_level];
- lfi.lim = lfi_n->lim[filter_level];
- lfi.hev_thr = lfi_n->hev_thr[hev_index];
-
- if (mb_col > 0)
- vp8_loop_filter_mbv
- (y_ptr, 0, 0, post->y_stride, 0, &lfi);
-
- if (!skip_lf)
- vp8_loop_filter_bv
- (y_ptr, 0, 0, post->y_stride, 0, &lfi);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- vp8_loop_filter_mbh
- (y_ptr, 0, 0, post->y_stride, 0, &lfi);
-
- if (!skip_lf)
- vp8_loop_filter_bh
- (y_ptr, 0, 0, post->y_stride, 0, &lfi);
- }
- else
- {
- if (mb_col > 0)
- vp8_loop_filter_simple_mbv
- (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
-
- if (!skip_lf)
- vp8_loop_filter_simple_bv
- (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- vp8_loop_filter_simple_mbh
- (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
-
- if (!skip_lf)
- vp8_loop_filter_simple_bh
- (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
- }
- }
-
- y_ptr += 16;
- mode_info_context ++; /* step to next MB */
-
- }
-
- y_ptr += post->y_stride * 16 - post->y_width;
- mode_info_context ++; /* Skip border mb */
- }
-
-}
-
-void vp8_loop_filter_partial_frame
-(
- VP8_COMMON *cm,
- MACROBLOCKD *mbd,
- int default_filt_lvl
-)
-{
- YV12_BUFFER_CONFIG *post = cm->frame_to_show;
-
- unsigned char *y_ptr;
- int mb_row;
- int mb_col;
- int mb_cols = post->y_width >> 4;
- int mb_rows = post->y_height >> 4;
-
- int linestocopy;
-
- loop_filter_info_n *lfi_n = &cm->lf_info;
- loop_filter_info lfi;
-
- int filter_level;
- FRAME_TYPE frame_type = cm->frame_type;
-
- const MODE_INFO *mode_info_context;
-
-#if 0
- if(default_filt_lvl == 0) /* no filter applied */
- return;
-#endif
-
- /* Initialize the loop filter for this frame. */
- vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);
-
- /* number of MB rows to use in partial filtering */
- linestocopy = mb_rows / PARTIAL_FRAME_FRACTION;
- linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
-
- /* Set up the buffer pointers; partial image starts at ~middle of frame */
- y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride;
- mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
-
- /* vp8_filter each macro block */
- for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
- {
- for (mb_col = 0; mb_col < mb_cols; mb_col++)
- {
- int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
- mode_info_context->mbmi.mode != SPLITMV &&
- mode_info_context->mbmi.mb_skip_coeff);
-
- const int mode_index =
- lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
- const int seg = mode_info_context->mbmi.segment_id;
- const int ref_frame = mode_info_context->mbmi.ref_frame;
-
- filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
-
- if (filter_level)
- {
- if (cm->filter_type == NORMAL_LOOPFILTER)
- {
- const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
- lfi.mblim = lfi_n->mblim[filter_level];
- lfi.blim = lfi_n->blim[filter_level];
- lfi.lim = lfi_n->lim[filter_level];
- lfi.hev_thr = lfi_n->hev_thr[hev_index];
-
- if (mb_col > 0)
- vp8_loop_filter_mbv
- (y_ptr, 0, 0, post->y_stride, 0, &lfi);
-
- if (!skip_lf)
- vp8_loop_filter_bv
- (y_ptr, 0, 0, post->y_stride, 0, &lfi);
-
- vp8_loop_filter_mbh
- (y_ptr, 0, 0, post->y_stride, 0, &lfi);
-
- if (!skip_lf)
- vp8_loop_filter_bh
- (y_ptr, 0, 0, post->y_stride, 0, &lfi);
- }
- else
- {
- if (mb_col > 0)
- vp8_loop_filter_simple_mbv
- (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
-
- if (!skip_lf)
- vp8_loop_filter_simple_bv
- (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
-
- vp8_loop_filter_simple_mbh
- (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
-
- if (!skip_lf)
- vp8_loop_filter_simple_bh
- (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
- }
- }
-
- y_ptr += 16;
- mode_info_context += 1; /* step to next MB */
- }
-
- y_ptr += post->y_stride * 16 - post->y_width;
- mode_info_context += 1; /* Skip border mb */
- }
-}
--- /dev/null
+++ b/vp8/common/vp8_loopfilter.c
@@ -1,0 +1,661 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "loopfilter.h"
+#include "onyxc_int.h"
+#include "vpx_mem/vpx_mem.h"
+
+
+static void lf_init_lut(loop_filter_info_n *lfi)
+{
+ int filt_lvl;
+
+ for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++)
+ {
+ if (filt_lvl >= 40)
+ {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
+ }
+ else if (filt_lvl >= 20)
+ {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
+ }
+ else if (filt_lvl >= 15)
+ {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
+ }
+ else
+ {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
+ }
+ }
+
+ lfi->mode_lf_lut[DC_PRED] = 1;
+ lfi->mode_lf_lut[V_PRED] = 1;
+ lfi->mode_lf_lut[H_PRED] = 1;
+ lfi->mode_lf_lut[TM_PRED] = 1;
+ lfi->mode_lf_lut[B_PRED] = 0;
+
+ lfi->mode_lf_lut[ZEROMV] = 1;
+ lfi->mode_lf_lut[NEARESTMV] = 2;
+ lfi->mode_lf_lut[NEARMV] = 2;
+ lfi->mode_lf_lut[NEWMV] = 2;
+ lfi->mode_lf_lut[SPLITMV] = 3;
+
+}
+
+void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
+ int sharpness_lvl)
+{
+ int i;
+
+ /* For each possible value for the loop filter fill out limits */
+ for (i = 0; i <= MAX_LOOP_FILTER; i++)
+ {
+ int filt_lvl = i;
+ int block_inside_limit = 0;
+
+ /* Set loop filter paramaeters that control sharpness. */
+ block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
+ block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
+
+ if (sharpness_lvl > 0)
+ {
+ if (block_inside_limit > (9 - sharpness_lvl))
+ block_inside_limit = (9 - sharpness_lvl);
+ }
+
+ if (block_inside_limit < 1)
+ block_inside_limit = 1;
+
+ memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
+ memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH);
+ memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
+ SIMD_WIDTH);
+ }
+}
+
+void vp8_loop_filter_init(VP8_COMMON *cm)
+{
+ loop_filter_info_n *lfi = &cm->lf_info;
+ int i;
+
+ /* init limits for given sharpness*/
+ vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
+ cm->last_sharpness_level = cm->sharpness_level;
+
+ /* init LUT for lvl and hev thr picking */
+ lf_init_lut(lfi);
+
+ /* init hev threshold const vectors */
+ for(i = 0; i < 4 ; i++)
+ {
+ memset(lfi->hev_thr[i], i, SIMD_WIDTH);
+ }
+}
+
+void vp8_loop_filter_frame_init(VP8_COMMON *cm,
+ MACROBLOCKD *mbd,
+ int default_filt_lvl)
+{
+ int seg, /* segment number */
+ ref, /* index in ref_lf_deltas */
+ mode; /* index in mode_lf_deltas */
+
+ loop_filter_info_n *lfi = &cm->lf_info;
+
+ /* update limits if sharpness has changed */
+ if(cm->last_sharpness_level != cm->sharpness_level)
+ {
+ vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
+ cm->last_sharpness_level = cm->sharpness_level;
+ }
+
+ for(seg = 0; seg < MAX_MB_SEGMENTS; seg++)
+ {
+ int lvl_seg = default_filt_lvl;
+ int lvl_ref, lvl_mode;
+
+ /* Note the baseline filter values for each segment */
+ if (mbd->segmentation_enabled)
+ {
+ /* Abs value */
+ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+ {
+ lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
+ }
+ else /* Delta Value */
+ {
+ lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
+ lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
+ }
+ }
+
+ if (!mbd->mode_ref_lf_delta_enabled)
+ {
+ /* we could get rid of this if we assume that deltas are set to
+ * zero when not in use; encoder always uses deltas
+ */
+ memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
+ continue;
+ }
+
+ /* INTRA_FRAME */
+ ref = INTRA_FRAME;
+
+ /* Apply delta for reference frame */
+ lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref];
+
+ /* Apply delta for Intra modes */
+ mode = 0; /* B_PRED */
+ /* Only the split mode BPRED has a further special case */
+ lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
+ /* clamp */
+ lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0;
+
+ lfi->lvl[seg][ref][mode] = lvl_mode;
+
+ mode = 1; /* all the rest of Intra modes */
+ /* clamp */
+ lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0;
+ lfi->lvl[seg][ref][mode] = lvl_mode;
+
+ /* LAST, GOLDEN, ALT */
+ for(ref = 1; ref < MAX_REF_FRAMES; ref++)
+ {
+ /* Apply delta for reference frame */
+ lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref];
+
+ /* Apply delta for Inter modes */
+ for (mode = 1; mode < 4; mode++)
+ {
+ lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
+ /* clamp */
+ lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0;
+
+ lfi->lvl[seg][ref][mode] = lvl_mode;
+ }
+ }
+ }
+}
+
+
+void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context,
+ int mb_row, int post_ystride, int post_uvstride,
+ unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr)
+{
+ int mb_col;
+ int filter_level;
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ loop_filter_info lfi;
+ FRAME_TYPE frame_type = cm->frame_type;
+
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+ {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level)
+ {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ vp8_loop_filter_mbv
+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bv
+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp8_loop_filter_mbh
+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bh
+ (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
+ }
+
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+
+ mode_info_context++; /* step to next MB */
+ }
+
+}
+
+void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context,
+ int mb_row, int post_ystride, int post_uvstride,
+ unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr)
+{
+ int mb_col;
+ int filter_level;
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ (void)post_uvstride;
+
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+ {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level)
+ {
+ if (mb_col > 0)
+ vp8_loop_filter_simple_mbv
+ (y_ptr, post_ystride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bv
+ (y_ptr, post_ystride, lfi_n->blim[filter_level]);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp8_loop_filter_simple_mbh
+ (y_ptr, post_ystride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bh
+ (y_ptr, post_ystride, lfi_n->blim[filter_level]);
+ }
+
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+
+ mode_info_context++; /* step to next MB */
+ }
+
+}
+void vp8_loop_filter_frame(VP8_COMMON *cm,
+ MACROBLOCKD *mbd,
+ int frame_type)
+{
+ YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ loop_filter_info lfi;
+
+ int mb_row;
+ int mb_col;
+ int mb_rows = cm->mb_rows;
+ int mb_cols = cm->mb_cols;
+
+ int filter_level;
+
+ unsigned char *y_ptr, *u_ptr, *v_ptr;
+
+ /* Point at base of Mb MODE_INFO list */
+ const MODE_INFO *mode_info_context = cm->mi;
+ int post_y_stride = post->y_stride;
+ int post_uv_stride = post->uv_stride;
+
+ /* Initialize the loop filter for this frame. */
+ vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
+
+ /* Set up the buffer pointers */
+ y_ptr = post->y_buffer;
+ u_ptr = post->u_buffer;
+ v_ptr = post->v_buffer;
+
+ /* vp8_filter each macro block */
+ if (cm->filter_type == NORMAL_LOOPFILTER)
+ {
+ for (mb_row = 0; mb_row < mb_rows; mb_row++)
+ {
+ for (mb_col = 0; mb_col < mb_cols; mb_col++)
+ {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level)
+ {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ vp8_loop_filter_mbv
+ (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bv
+ (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp8_loop_filter_mbh
+ (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bh
+ (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
+ }
+
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+
+ mode_info_context++; /* step to next MB */
+ }
+ y_ptr += post_y_stride * 16 - post->y_width;
+ u_ptr += post_uv_stride * 8 - post->uv_width;
+ v_ptr += post_uv_stride * 8 - post->uv_width;
+
+ mode_info_context++; /* Skip border mb */
+
+ }
+ }
+ else /* SIMPLE_LOOPFILTER */
+ {
+ for (mb_row = 0; mb_row < mb_rows; mb_row++)
+ {
+ for (mb_col = 0; mb_col < mb_cols; mb_col++)
+ {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+ if (filter_level)
+ {
+ const unsigned char * mblim = lfi_n->mblim[filter_level];
+ const unsigned char * blim = lfi_n->blim[filter_level];
+
+ if (mb_col > 0)
+ vp8_loop_filter_simple_mbv
+ (y_ptr, post_y_stride, mblim);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bv
+ (y_ptr, post_y_stride, blim);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp8_loop_filter_simple_mbh
+ (y_ptr, post_y_stride, mblim);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bh
+ (y_ptr, post_y_stride, blim);
+ }
+
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+
+ mode_info_context++; /* step to next MB */
+ }
+ y_ptr += post_y_stride * 16 - post->y_width;
+ u_ptr += post_uv_stride * 8 - post->uv_width;
+ v_ptr += post_uv_stride * 8 - post->uv_width;
+
+ mode_info_context++; /* Skip border mb */
+
+ }
+ }
+}
+
+void vp8_loop_filter_frame_yonly
+(
+ VP8_COMMON *cm,
+ MACROBLOCKD *mbd,
+ int default_filt_lvl
+)
+{
+ YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+
+ unsigned char *y_ptr;
+ int mb_row;
+ int mb_col;
+
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ loop_filter_info lfi;
+
+ int filter_level;
+ FRAME_TYPE frame_type = cm->frame_type;
+
+ /* Point at base of Mb MODE_INFO list */
+ const MODE_INFO *mode_info_context = cm->mi;
+
+#if 0
+ if(default_filt_lvl == 0) /* no filter applied */
+ return;
+#endif
+
+ /* Initialize the loop filter for this frame. */
+ vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);
+
+ /* Set up the buffer pointers */
+ y_ptr = post->y_buffer;
+
+ /* vp8_filter each macro block */
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+ {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+ {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level)
+ {
+ if (cm->filter_type == NORMAL_LOOPFILTER)
+ {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ vp8_loop_filter_mbv
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bv
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp8_loop_filter_mbh
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bh
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+ }
+ else
+ {
+ if (mb_col > 0)
+ vp8_loop_filter_simple_mbv
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bv
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ vp8_loop_filter_simple_mbh
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bh
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+ }
+ }
+
+ y_ptr += 16;
+ mode_info_context ++; /* step to next MB */
+
+ }
+
+ y_ptr += post->y_stride * 16 - post->y_width;
+ mode_info_context ++; /* Skip border mb */
+ }
+
+}
+
+void vp8_loop_filter_partial_frame
+(
+ VP8_COMMON *cm,
+ MACROBLOCKD *mbd,
+ int default_filt_lvl
+)
+{
+ YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+
+ unsigned char *y_ptr;
+ int mb_row;
+ int mb_col;
+ int mb_cols = post->y_width >> 4;
+ int mb_rows = post->y_height >> 4;
+
+ int linestocopy;
+
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ loop_filter_info lfi;
+
+ int filter_level;
+ FRAME_TYPE frame_type = cm->frame_type;
+
+ const MODE_INFO *mode_info_context;
+
+#if 0
+ if(default_filt_lvl == 0) /* no filter applied */
+ return;
+#endif
+
+ /* Initialize the loop filter for this frame. */
+ vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);
+
+ /* number of MB rows to use in partial filtering */
+ linestocopy = mb_rows / PARTIAL_FRAME_FRACTION;
+ linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
+
+ /* Set up the buffer pointers; partial image starts at ~middle of frame */
+ y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride;
+ mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
+
+ /* vp8_filter each macro block */
+ for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
+ {
+ for (mb_col = 0; mb_col < mb_cols; mb_col++)
+ {
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index =
+ lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
+ if (filter_level)
+ {
+ if (cm->filter_type == NORMAL_LOOPFILTER)
+ {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ vp8_loop_filter_mbv
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bv
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ vp8_loop_filter_mbh
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ vp8_loop_filter_bh
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+ }
+ else
+ {
+ if (mb_col > 0)
+ vp8_loop_filter_simple_mbv
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bv
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+
+ vp8_loop_filter_simple_mbh
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ vp8_loop_filter_simple_bh
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+ }
+ }
+
+ y_ptr += 16;
+ mode_info_context += 1; /* step to next MB */
+ }
+
+ y_ptr += post->y_stride * 16 - post->y_width;
+ mode_info_context += 1; /* Skip border mb */
+ }
+}
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -53,7 +53,7 @@
VP8_COMMON_SRCS-yes += common/systemdependent.h
VP8_COMMON_SRCS-yes += common/threading.h
VP8_COMMON_SRCS-yes += common/treecoder.h
-VP8_COMMON_SRCS-yes += common/loopfilter.c
+VP8_COMMON_SRCS-yes += common/vp8_loopfilter.c
VP8_COMMON_SRCS-yes += common/loopfilter_filters.c
VP8_COMMON_SRCS-yes += common/mbpitch.c
VP8_COMMON_SRCS-yes += common/modecont.c
@@ -161,7 +161,7 @@
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfilter_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_loopfilter_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c