ref: c26a9ecaa21f41156afafee2576a2afed7d9e284
parent: 7cdcfee82ca6ff6460a7011ee0fb4b7fe824e5b9
author: Ronald S. Bultje <[email protected]>
date: Wed Sep 30 14:44:37 EDT 2015
vp8: change build_intra4x4_predictors() to use vpx_dsp. I've added a few new functions (d45e, d63e, he, ve) to cover the filtered h/v 4x4 predictors that are vp8-specific, the "correct" d45 with the correctly filtered bottom-right pixel (as opposed to the unfiltered version in vp9), and the "broken" d63 with weirdly filtered bottom-right pixels (which is correctly filtered in vp9). There may be a minor performance impact on all systems because we have to do an extra copy of the Above pixel array to incorporate the topleft pixel in the same array (thus fitting the vpx_dsp API). In addition, armv6 will have a more serious performance impact b/c I removed the armv6/vp8-specific assembly. I'm not sure anyone cares... Change-Id: I7f9e5ebee11d8e21aca2cd517a69eefc181b2e86
--- a/vp8/common/arm/armv6/intra4x4_predict_v6.asm
+++ /dev/null
@@ -1,611 +1,0 @@
-;
-; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_intra4x4_predict_armv6|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-
-;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft,
-; B_PREDICTION_MODE left_stride, int b_mode,
-; unsigned char *dst, int dst_stride,
-; unsigned char top_left)
-
-; r0: *Above
-; r1: *yleft
-; r2: left_stride
-; r3: b_mode
-; sp + #40: dst
-; sp + #44: dst_stride
-; sp + #48: top_left
-|vp8_intra4x4_predict_armv6| PROC
- push {r4-r12, lr}
-
- cmp r3, #10
- addlt pc, pc, r3, lsl #2 ; position independent switch
- pop {r4-r12, pc} ; default
- b b_dc_pred
- b b_tm_pred
- b b_ve_pred
- b b_he_pred
- b b_ld_pred
- b b_rd_pred
- b b_vr_pred
- b b_vl_pred
- b b_hd_pred
- b b_hu_pred
-
-b_dc_pred
- ; load values
- ldr r8, [r0] ; Above
- ldrb r4, [r1], r2 ; Left[0]
- mov r9, #0
- ldrb r5, [r1], r2 ; Left[1]
- ldrb r6, [r1], r2 ; Left[2]
- usad8 r12, r8, r9
- ldrb r7, [r1] ; Left[3]
-
- ; calculate dc
- add r4, r4, r5
- add r4, r4, r6
- add r4, r4, r7
- add r4, r4, r12
- add r4, r4, #4
- ldr r0, [sp, #44] ; dst_stride
- mov r12, r4, asr #3 ; (expected_dc + 4) >> 3
-
- add r12, r12, r12, lsl #8
- ldr r3, [sp, #40] ; dst
- add r12, r12, r12, lsl #16
-
- ; store values
- str r12, [r3], r0
- str r12, [r3], r0
- str r12, [r3], r0
- str r12, [r3]
-
- pop {r4-r12, pc}
-
-b_tm_pred
- ldr r8, [r0] ; Above
- ldrb r9, [sp, #48] ; top_left
- ldrb r4, [r1], r2 ; Left[0]
- ldrb r5, [r1], r2 ; Left[1]
- ldrb r6, [r1], r2 ; Left[2]
- ldrb r7, [r1] ; Left[3]
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- add r9, r9, r9, lsl #16 ; [tl|tl]
- uxtb16 r10, r8 ; a[2|0]
- uxtb16 r11, r8, ror #8 ; a[3|1]
- ssub16 r10, r10, r9 ; a[2|0] - [tl|tl]
- ssub16 r11, r11, r9 ; a[3|1] - [tl|tl]
-
- add r4, r4, r4, lsl #16 ; l[0|0]
- add r5, r5, r5, lsl #16 ; l[1|1]
- add r6, r6, r6, lsl #16 ; l[2|2]
- add r7, r7, r7, lsl #16 ; l[3|3]
-
- sadd16 r1, r4, r10 ; l[0|0] + a[2|0] - [tl|tl]
- sadd16 r2, r4, r11 ; l[0|0] + a[3|1] - [tl|tl]
- usat16 r1, #8, r1
- usat16 r2, #8, r2
-
- sadd16 r4, r5, r10 ; l[1|1] + a[2|0] - [tl|tl]
- sadd16 r5, r5, r11 ; l[1|1] + a[3|1] - [tl|tl]
-
- add r12, r1, r2, lsl #8 ; [3|2|1|0]
- str r12, [r3], r0
-
- usat16 r4, #8, r4
- usat16 r5, #8, r5
-
- sadd16 r1, r6, r10 ; l[2|2] + a[2|0] - [tl|tl]
- sadd16 r2, r6, r11 ; l[2|2] + a[3|1] - [tl|tl]
-
- add r12, r4, r5, lsl #8 ; [3|2|1|0]
- str r12, [r3], r0
-
- usat16 r1, #8, r1
- usat16 r2, #8, r2
-
- sadd16 r4, r7, r10 ; l[3|3] + a[2|0] - [tl|tl]
- sadd16 r5, r7, r11 ; l[3|3] + a[3|1] - [tl|tl]
-
- add r12, r1, r2, lsl #8 ; [3|2|1|0]
-
- usat16 r4, #8, r4
- usat16 r5, #8, r5
-
- str r12, [r3], r0
-
- add r12, r4, r5, lsl #8 ; [3|2|1|0]
- str r12, [r3]
-
- pop {r4-r12, pc}
-
-b_ve_pred
- ldr r8, [r0] ; a[3|2|1|0]
- ldr r11, c00FF00FF
- ldrb r9, [sp, #48] ; top_left
- ldrb r10, [r0, #4] ; a[4]
-
- ldr r0, c00020002
-
- uxtb16 r4, r8 ; a[2|0]
- uxtb16 r5, r8, ror #8 ; a[3|1]
- ldr r2, [sp, #44] ; dst_stride
- pkhbt r9, r9, r5, lsl #16 ; a[1|-1]
-
- add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ]
- uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ]
- ldr r3, [sp, #40] ; dst
- uxtab16 r9, r9, r0 ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2]
-
- add r0, r0, r10, lsl #16 ;[a[4]+2 | 2]
- add r0, r0, r4, asr #16 ;[a[4]+2 | a[2]+2]
- add r0, r0, r5, lsl #1 ;[a[4]+2*a[3]+2 | a[2]+2*a[1]+2]
- uadd16 r4, r4, r0 ;[a[4]+2*a[3]+a[2]+2|a[2]+2*a[1]+a[0]+2]
-
- and r9, r11, r9, asr #2
- and r4, r11, r4, asr #2
- add r9, r9, r4, lsl #8
-
- ; store values
- str r9, [r3], r2
- str r9, [r3], r2
- str r9, [r3], r2
- str r9, [r3]
-
- pop {r4-r12, pc}
-
-
-b_he_pred
- ldrb r4, [r1], r2 ; Left[0]
- ldrb r8, [sp, #48] ; top_left
- ldrb r5, [r1], r2 ; Left[1]
- ldrb r6, [r1], r2 ; Left[2]
- ldrb r7, [r1] ; Left[3]
-
- add r8, r8, r4 ; tl + l[0]
- add r9, r4, r5 ; l[0] + l[1]
- add r10, r5, r6 ; l[1] + l[2]
- add r11, r6, r7 ; l[2] + l[3]
-
- mov r0, #2<<14
-
- add r8, r8, r9 ; tl + 2*l[0] + l[1]
- add r4, r9, r10 ; l[0] + 2*l[1] + l[2]
- add r5, r10, r11 ; l[1] + 2*l[2] + l[3]
- add r6, r11, r7, lsl #1 ; l[2] + 2*l[3] + l[3]
-
-
- add r8, r0, r8, lsl #14 ; (tl + 2*l[0] + l[1])>>2 in top half
- add r9, r0, r4, lsl #14 ; (l[0] + 2*l[1] + l[2])>>2 in top half
- add r10,r0, r5, lsl #14 ; (l[1] + 2*l[2] + l[3])>>2 in top half
- add r11,r0, r6, lsl #14 ; (l[2] + 2*l[3] + l[3])>>2 in top half
-
- pkhtb r8, r8, r8, asr #16 ; l[-|0|-|0]
- pkhtb r9, r9, r9, asr #16 ; l[-|1|-|1]
- pkhtb r10, r10, r10, asr #16 ; l[-|2|-|2]
- pkhtb r11, r11, r11, asr #16 ; l[-|3|-|3]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- add r8, r8, r8, lsl #8 ; l[0|0|0|0]
- add r9, r9, r9, lsl #8 ; l[1|1|1|1]
- add r10, r10, r10, lsl #8 ; l[2|2|2|2]
- add r11, r11, r11, lsl #8 ; l[3|3|3|3]
-
- ; store values
- str r8, [r3], r0
- str r9, [r3], r0
- str r10, [r3], r0
- str r11, [r3]
-
- pop {r4-r12, pc}
-
-b_ld_pred
- ldr r4, [r0] ; Above[0-3]
- ldr r12, c00020002
- ldr r5, [r0, #4] ; Above[4-7]
- ldr lr, c00FF00FF
-
- uxtb16 r6, r4 ; a[2|0]
- uxtb16 r7, r4, ror #8 ; a[3|1]
- uxtb16 r8, r5 ; a[6|4]
- uxtb16 r9, r5, ror #8 ; a[7|5]
- pkhtb r10, r6, r8 ; a[2|4]
- pkhtb r11, r7, r9 ; a[3|5]
-
- add r4, r6, r7, lsl #1 ; [a2+2*a3 | a0+2*a1]
- add r4, r4, r10, ror #16 ; [a2+2*a3+a4 | a0+2*a1+a2]
- uxtab16 r4, r4, r12 ; [a2+2*a3+a4+2 | a0+2*a1+a2+2]
-
- add r5, r7, r10, ror #15 ; [a3+2*a4 | a1+2*a2]
- add r5, r5, r11, ror #16 ; [a3+2*a4+a5 | a1+2*a2+a3]
- uxtab16 r5, r5, r12 ; [a3+2*a4+a5+2 | a1+2*a2+a3+2]
-
- pkhtb r7, r9, r8, asr #16
- add r6, r8, r9, lsl #1 ; [a6+2*a7 | a4+2*a5]
- uadd16 r6, r6, r7 ; [a6+2*a7+a7 | a4+2*a5+a6]
- uxtab16 r6, r6, r12 ; [a6+2*a7+a7+2 | a4+2*a5+a6+2]
-
- uxth r7, r9 ; [ a5]
- add r7, r7, r8, asr #15 ; [ a5+2*a6]
- add r7, r7, r9, asr #16 ; [ a5+2*a6+a7]
- uxtah r7, r7, r12 ; [ a5+2*a6+a7+2]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- ; scale down
- and r4, lr, r4, asr #2
- and r5, lr, r5, asr #2
- and r6, lr, r6, asr #2
- mov r7, r7, asr #2
-
- add r8, r4, r5, lsl #8 ; [3|2|1|0]
- str r8, [r3], r0
-
- mov r9, r8, lsr #8
- add r9, r9, r6, lsl #24 ; [4|3|2|1]
- str r9, [r3], r0
-
- mov r10, r9, lsr #8
- add r10, r10, r7, lsl #24 ; [5|4|3|2]
- str r10, [r3], r0
-
- mov r6, r6, lsr #16
- mov r11, r10, lsr #8
- add r11, r11, r6, lsl #24 ; [6|5|4|3]
- str r11, [r3]
-
- pop {r4-r12, pc}
-
-b_rd_pred
- ldrb r7, [r1], r2 ; l[0] = pp[3]
- ldr lr, [r0] ; Above = pp[8|7|6|5]
- ldrb r8, [sp, #48] ; tl = pp[4]
- ldrb r6, [r1], r2 ; l[1] = pp[2]
- ldrb r5, [r1], r2 ; l[2] = pp[1]
- ldrb r4, [r1], r2 ; l[3] = pp[0]
-
-
- uxtb16 r9, lr ; p[7|5]
- uxtb16 r10, lr, ror #8 ; p[8|6]
- add r4, r4, r6, lsl #16 ; p[2|0]
- add r5, r5, r7, lsl #16 ; p[3|1]
- add r6, r6, r8, lsl #16 ; p[4|2]
- pkhbt r7, r7, r9, lsl #16 ; p[5|3]
- pkhbt r8, r8, r10, lsl #16 ; p[6|4]
-
- ldr r12, c00020002
- ldr lr, c00FF00FF
-
- add r4, r4, r5, lsl #1 ; [p2+2*p3 | p0+2*p1]
- add r4, r4, r6 ; [p2+2*p3+p4 | p0+2*p1+p2]
- uxtab16 r4, r4, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2]
-
- add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2]
- add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3]
- uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
-
- add r6, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4]
- add r6, r6, r9 ; [p5+2*p6+p7 | p3+2*p4+p5]
- uxtab16 r6, r6, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
-
- add r7, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5]
- add r7, r7, r10 ; [p6+2*p7+p8 | p4+2*p5+p6]
- uxtab16 r7, r7, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- ; scale down
- and r7, lr, r7, asr #2
- and r6, lr, r6, asr #2
- and r5, lr, r5, asr #2
- and r4, lr, r4, asr #2
-
- add r8, r6, r7, lsl #8 ; [6|5|4|3]
- str r8, [r3], r0
-
- mov r9, r8, lsl #8 ; [5|4|3|-]
- uxtab r9, r9, r4, ror #16 ; [5|4|3|2]
- str r9, [r3], r0
-
- mov r10, r9, lsl #8 ; [4|3|2|-]
- uxtab r10, r10, r5 ; [4|3|2|1]
- str r10, [r3], r0
-
- mov r11, r10, lsl #8 ; [3|2|1|-]
- uxtab r11, r11, r4 ; [3|2|1|0]
- str r11, [r3]
-
- pop {r4-r12, pc}
-
-b_vr_pred
- ldrb r7, [r1], r2 ; l[0] = pp[3]
- ldr lr, [r0] ; Above = pp[8|7|6|5]
- ldrb r8, [sp, #48] ; tl = pp[4]
- ldrb r6, [r1], r2 ; l[1] = pp[2]
- ldrb r5, [r1], r2 ; l[2] = pp[1]
- ldrb r4, [r1] ; l[3] = pp[0]
-
- add r5, r5, r7, lsl #16 ; p[3|1]
- add r6, r6, r8, lsl #16 ; p[4|2]
- uxtb16 r9, lr ; p[7|5]
- uxtb16 r10, lr, ror #8 ; p[8|6]
- pkhbt r7, r7, r9, lsl #16 ; p[5|3]
- pkhbt r8, r8, r10, lsl #16 ; p[6|4]
-
- ldr r4, c00010001
- ldr r12, c00020002
- ldr lr, c00FF00FF
-
- add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2]
- add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3]
- uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
-
- add r6, r6, r7, lsl #1 ; [p4+2*p5 | p2+2*p3]
- add r6, r6, r8 ; [p4+2*p5+p6 | p2+2*p3+p4]
- uxtab16 r6, r6, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2]
-
- uadd16 r11, r8, r9 ; [p6+p7 | p4+p5]
- uhadd16 r11, r11, r4 ; [(p6+p7+1)>>1 | (p4+p5+1)>>1]
- ; [F|E]
-
- add r7, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4]
- add r7, r7, r9 ; [p5+2*p6+p7 | p3+2*p4+p5]
- uxtab16 r7, r7, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
-
- uadd16 r2, r9, r10 ; [p7+p8 | p5+p6]
- uhadd16 r2, r2, r4 ; [(p7+p8+1)>>1 | (p5+p6+1)>>1]
- ; [J|I]
-
- add r8, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5]
- add r8, r8, r10 ; [p6+2*p7+p8 | p4+2*p5+p6]
- uxtab16 r8, r8, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- ; scale down
- and r5, lr, r5, asr #2 ; [B|A]
- and r6, lr, r6, asr #2 ; [D|C]
- and r7, lr, r7, asr #2 ; [H|G]
- and r8, lr, r8, asr #2 ; [L|K]
-
- add r12, r11, r2, lsl #8 ; [J|F|I|E]
- str r12, [r3], r0
-
- add r12, r7, r8, lsl #8 ; [L|H|K|G]
- str r12, [r3], r0
-
- pkhbt r2, r6, r2, lsl #16 ; [-|I|-|C]
- add r2, r2, r11, lsl #8 ; [F|I|E|C]
-
- pkhtb r12, r6, r5 ; [-|D|-|A]
- pkhtb r10, r7, r5, asr #16 ; [-|H|-|B]
- str r2, [r3], r0
- add r12, r12, r10, lsl #8 ; [H|D|B|A]
- str r12, [r3]
-
- pop {r4-r12, pc}
-
-b_vl_pred
- ldr r4, [r0] ; [3|2|1|0] = Above[0-3]
- ldr r12, c00020002
- ldr r5, [r0, #4] ; [7|6|5|4] = Above[4-7]
- ldr lr, c00FF00FF
- ldr r2, c00010001
-
- mov r0, r4, lsr #16 ; [-|-|3|2]
- add r0, r0, r5, lsl #16 ; [5|4|3|2]
- uxtb16 r6, r4 ; [2|0]
- uxtb16 r7, r4, ror #8 ; [3|1]
- uxtb16 r8, r0 ; [4|2]
- uxtb16 r9, r0, ror #8 ; [5|3]
- uxtb16 r10, r5 ; [6|4]
- uxtb16 r11, r5, ror #8 ; [7|5]
-
- uadd16 r4, r6, r7 ; [p2+p3 | p0+p1]
- uhadd16 r4, r4, r2 ; [(p2+p3+1)>>1 | (p0+p1+1)>>1]
- ; [B|A]
-
- add r5, r6, r7, lsl #1 ; [p2+2*p3 | p0+2*p1]
- add r5, r5, r8 ; [p2+2*p3+p4 | p0+2*p1+p2]
- uxtab16 r5, r5, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2]
-
- uadd16 r6, r7, r8 ; [p3+p4 | p1+p2]
- uhadd16 r6, r6, r2 ; [(p3+p4+1)>>1 | (p1+p2+1)>>1]
- ; [F|E]
-
- add r7, r7, r8, lsl #1 ; [p3+2*p4 | p1+2*p2]
- add r7, r7, r9 ; [p3+2*p4+p5 | p1+2*p2+p3]
- uxtab16 r7, r7, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
-
- add r8, r8, r9, lsl #1 ; [p4+2*p5 | p2+2*p3]
- add r8, r8, r10 ; [p4+2*p5+p6 | p2+2*p3+p4]
- uxtab16 r8, r8, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2]
-
- add r9, r9, r10, lsl #1 ; [p5+2*p6 | p3+2*p4]
- add r9, r9, r11 ; [p5+2*p6+p7 | p3+2*p4+p5]
- uxtab16 r9, r9, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- ; scale down
- and r5, lr, r5, asr #2 ; [D|C]
- and r7, lr, r7, asr #2 ; [H|G]
- and r8, lr, r8, asr #2 ; [I|D]
- and r9, lr, r9, asr #2 ; [J|H]
-
- add r10, r4, r6, lsl #8 ; [F|B|E|A]
- str r10, [r3], r0
-
- add r5, r5, r7, lsl #8 ; [H|C|G|D]
- str r5, [r3], r0
-
- pkhtb r12, r8, r4, asr #16 ; [-|I|-|B]
- pkhtb r10, r9, r8 ; [-|J|-|D]
-
- add r12, r6, r12, lsl #8 ; [I|F|B|E]
- str r12, [r3], r0
-
- add r10, r7, r10, lsl #8 ; [J|H|D|G]
- str r10, [r3]
-
- pop {r4-r12, pc}
-
-b_hd_pred
- ldrb r7, [r1], r2 ; l[0] = pp[3]
- ldr lr, [r0] ; Above = pp[8|7|6|5]
- ldrb r8, [sp, #48] ; tl = pp[4]
- ldrb r6, [r1], r2 ; l[1] = pp[2]
- ldrb r5, [r1], r2 ; l[2] = pp[1]
- ldrb r4, [r1] ; l[3] = pp[0]
-
- uxtb16 r9, lr ; p[7|5]
- uxtb16 r10, lr, ror #8 ; p[8|6]
-
- add r4, r4, r5, lsl #16 ; p[1|0]
- add r5, r5, r6, lsl #16 ; p[2|1]
- add r6, r6, r7, lsl #16 ; p[3|2]
- add r7, r7, r8, lsl #16 ; p[4|3]
-
- ldr r12, c00020002
- ldr lr, c00FF00FF
- ldr r2, c00010001
-
- pkhtb r8, r7, r9 ; p[4|5]
- pkhtb r1, r9, r10 ; p[7|6]
- pkhbt r10, r8, r10, lsl #16 ; p[6|5]
-
- uadd16 r11, r4, r5 ; [p1+p2 | p0+p1]
- uhadd16 r11, r11, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
- ; [B|A]
-
- add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1]
- add r4, r4, r6 ; [p1+2*p2+p3 | p0+2*p1+p2]
- uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
-
- uadd16 r0, r6, r7 ; [p3+p4 | p2+p3]
- uhadd16 r0, r0, r2 ; [(p3+p4+1)>>1 | (p2+p3+1)>>1]
- ; [F|E]
-
- add r5, r6, r7, lsl #1 ; [p3+2*p4 | p2+2*p3]
- add r5, r5, r8, ror #16 ; [p3+2*p4+p5 | p2+2*p3+p4]
- uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p2+2*p3+p4+2]
-
- add r6, r12, r8, ror #16 ; [p5+2 | p4+2]
- add r6, r6, r10, lsl #1 ; [p5+2+2*p6 | p4+2+2*p5]
- uxtab16 r6, r6, r1 ; [p5+2+2*p6+p7 | p4+2+2*p5+p6]
-
- ; scale down
- and r4, lr, r4, asr #2 ; [D|C]
- and r5, lr, r5, asr #2 ; [H|G]
- and r6, lr, r6, asr #2 ; [J|I]
-
- ldr lr, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- pkhtb r2, r0, r6 ; [-|F|-|I]
- pkhtb r12, r6, r5, asr #16 ; [-|J|-|H]
- add r12, r12, r2, lsl #8 ; [F|J|I|H]
- add r2, r0, r5, lsl #8 ; [H|F|G|E]
- mov r12, r12, ror #24 ; [J|I|H|F]
- str r12, [r3], lr
-
- mov r7, r11, asr #16 ; [-|-|-|B]
- str r2, [r3], lr
- add r7, r7, r0, lsl #16 ; [-|E|-|B]
- add r7, r7, r4, asr #8 ; [-|E|D|B]
- add r7, r7, r5, lsl #24 ; [G|E|D|B]
- str r7, [r3], lr
-
- add r5, r11, r4, lsl #8 ; [D|B|C|A]
- str r5, [r3]
-
- pop {r4-r12, pc}
-
-
-
-b_hu_pred
- ldrb r4, [r1], r2 ; Left[0]
- ldr r12, c00020002
- ldrb r5, [r1], r2 ; Left[1]
- ldr lr, c00FF00FF
- ldrb r6, [r1], r2 ; Left[2]
- ldr r2, c00010001
- ldrb r7, [r1] ; Left[3]
-
- add r4, r4, r5, lsl #16 ; [1|0]
- add r5, r5, r6, lsl #16 ; [2|1]
- add r9, r6, r7, lsl #16 ; [3|2]
-
- uadd16 r8, r4, r5 ; [p1+p2 | p0+p1]
- uhadd16 r8, r8, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
- ; [B|A]
-
- add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1]
- add r4, r4, r9 ; [p1+2*p2+p3 | p0+2*p1+p2]
- uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
- ldr r2, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
- and r4, lr, r4, asr #2 ; [D|C]
-
- add r10, r6, r7 ; [p2+p3]
- add r11, r10, r7, lsl #1 ; [p2+3*p3]
- add r10, r10, #1
- add r11, r11, #2
- mov r10, r10, asr #1 ; [E]
- mov r11, r11, asr #2 ; [F]
-
- add r9, r7, r9, asr #8 ; [-|-|G|G]
- add r0, r8, r4, lsl #8 ; [D|B|C|A]
- add r7, r9, r9, lsl #16 ; [G|G|G|G]
-
- str r0, [r3], r2
-
- mov r1, r8, asr #16 ; [-|-|-|B]
- add r1, r1, r4, asr #8 ; [-|-|D|B]
- add r1, r1, r10, lsl #16 ; [-|E|D|B]
- add r1, r1, r11, lsl #24 ; [F|E|D|B]
- str r1, [r3], r2
-
- add r10, r11, lsl #8 ; [-|-|F|E]
- add r10, r10, r9, lsl #16 ; [G|G|F|E]
- str r10, [r3], r2
-
- str r7, [r3]
-
- pop {r4-r12, pc}
-
- ENDP
-
-; constants
-c00010001
- DCD 0x00010001
-c00020002
- DCD 0x00020002
-c00FF00FF
- DCD 0x00FF00FF
-
- END
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -16,6 +16,7 @@
#include "vpx_ports/vpx_once.h"
#include "blockd.h"
#include "vp8/common/reconintra.h"
+#include "vp8/common/reconintra4x4.h"
enum {
SIZE_16,
@@ -43,6 +44,7 @@
INIT_SIZE(16);
INIT_SIZE(8);
+ vp8_init_intra4x4_predictors_internal();
}
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -8,290 +8,47 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <string.h>
#include "vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "blockd.h"
-void vp8_intra4x4_predict_c(unsigned char *Above,
- unsigned char *yleft, int left_stride,
- int _b_mode,
- unsigned char *dst, int dst_stride,
- unsigned char top_left)
+typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left);
+
+static intra_pred_fn pred[10];
+
+void vp8_init_intra4x4_predictors_internal(void)
{
- int i, r, c;
- B_PREDICTION_MODE b_mode = (B_PREDICTION_MODE)_b_mode;
+ pred[B_DC_PRED] = vpx_dc_predictor_4x4;
+ pred[B_TM_PRED] = vpx_tm_predictor_4x4;
+ pred[B_VE_PRED] = vpx_ve_predictor_4x4;
+ pred[B_HE_PRED] = vpx_he_predictor_4x4;
+ pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
+ pred[B_RD_PRED] = vpx_d135_predictor_4x4;
+ pred[B_VR_PRED] = vpx_d117_predictor_4x4;
+ pred[B_VL_PRED] = vpx_d63e_predictor_4x4;
+ pred[B_HD_PRED] = vpx_d153_predictor_4x4;
+ pred[B_HU_PRED] = vpx_d207_predictor_4x4;
+}
+
+void vp8_intra4x4_predict(unsigned char *above,
+ unsigned char *yleft, int left_stride,
+ B_PREDICTION_MODE b_mode,
+ unsigned char *dst, int dst_stride,
+ unsigned char top_left)
+{
unsigned char Left[4];
+ unsigned char Aboveb[12], *Above = Aboveb + 4;
+
Left[0] = yleft[0];
Left[1] = yleft[left_stride];
Left[2] = yleft[2 * left_stride];
Left[3] = yleft[3 * left_stride];
+ memcpy(Above, above, 8);
+ Above[-1] = top_left;
- switch (b_mode)
- {
- case B_DC_PRED:
- {
- int expected_dc = 0;
-
- for (i = 0; i < 4; i++)
- {
- expected_dc += Above[i];
- expected_dc += Left[i];
- }
-
- expected_dc = (expected_dc + 4) >> 3;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- dst[c] = expected_dc;
- }
-
- dst += dst_stride;
- }
- }
- break;
- case B_TM_PRED:
- {
- /* prediction similar to true_motion prediction */
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- int pred = Above[c] - top_left + Left[r];
-
- if (pred < 0)
- pred = 0;
-
- if (pred > 255)
- pred = 255;
-
- dst[c] = pred;
- }
-
- dst += dst_stride;
- }
- }
- break;
-
- case B_VE_PRED:
- {
-
- unsigned int ap[4];
- ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2;
- ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
- ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
- ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
-
- dst[c] = ap[c];
- }
-
- dst += dst_stride;
- }
-
- }
- break;
-
-
- case B_HE_PRED:
- {
-
- unsigned int lp[4];
- lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
- lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
- lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
- lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- dst[c] = lp[r];
- }
-
- dst += dst_stride;
- }
- }
- break;
- case B_LD_PRED:
- {
- unsigned char *ptr = Above;
- dst[0 * dst_stride + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
- dst[0 * dst_stride + 1] =
- dst[1 * dst_stride + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
- dst[0 * dst_stride + 2] =
- dst[1 * dst_stride + 1] =
- dst[2 * dst_stride + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
- dst[0 * dst_stride + 3] =
- dst[1 * dst_stride + 2] =
- dst[2 * dst_stride + 1] =
- dst[3 * dst_stride + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
- dst[1 * dst_stride + 3] =
- dst[2 * dst_stride + 2] =
- dst[3 * dst_stride + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
- dst[2 * dst_stride + 3] =
- dst[3 * dst_stride + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
- dst[3 * dst_stride + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
-
- }
- break;
- case B_RD_PRED:
- {
-
- unsigned char pp[9];
-
- pp[0] = Left[3];
- pp[1] = Left[2];
- pp[2] = Left[1];
- pp[3] = Left[0];
- pp[4] = top_left;
- pp[5] = Above[0];
- pp[6] = Above[1];
- pp[7] = Above[2];
- pp[8] = Above[3];
-
- dst[3 * dst_stride + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- dst[3 * dst_stride + 1] =
- dst[2 * dst_stride + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[3 * dst_stride + 2] =
- dst[2 * dst_stride + 1] =
- dst[1 * dst_stride + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- dst[3 * dst_stride + 3] =
- dst[2 * dst_stride + 2] =
- dst[1 * dst_stride + 1] =
- dst[0 * dst_stride + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- dst[2 * dst_stride + 3] =
- dst[1 * dst_stride + 2] =
- dst[0 * dst_stride + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- dst[1 * dst_stride + 3] =
- dst[0 * dst_stride + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- dst[0 * dst_stride + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
-
- }
- break;
- case B_VR_PRED:
- {
-
- unsigned char pp[9];
-
- pp[0] = Left[3];
- pp[1] = Left[2];
- pp[2] = Left[1];
- pp[3] = Left[0];
- pp[4] = top_left;
- pp[5] = Above[0];
- pp[6] = Above[1];
- pp[7] = Above[2];
- pp[8] = Above[3];
-
-
- dst[3 * dst_stride + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[2 * dst_stride + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- dst[3 * dst_stride + 1] =
- dst[1 * dst_stride + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- dst[2 * dst_stride + 1] =
- dst[0 * dst_stride + 0] = (pp[4] + pp[5] + 1) >> 1;
- dst[3 * dst_stride + 2] =
- dst[1 * dst_stride + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- dst[2 * dst_stride + 2] =
- dst[0 * dst_stride + 1] = (pp[5] + pp[6] + 1) >> 1;
- dst[3 * dst_stride + 3] =
- dst[1 * dst_stride + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- dst[2 * dst_stride + 3] =
- dst[0 * dst_stride + 2] = (pp[6] + pp[7] + 1) >> 1;
- dst[1 * dst_stride + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
- dst[0 * dst_stride + 3] = (pp[7] + pp[8] + 1) >> 1;
-
- }
- break;
- case B_VL_PRED:
- {
-
- unsigned char *pp = Above;
-
- dst[0 * dst_stride + 0] = (pp[0] + pp[1] + 1) >> 1;
- dst[1 * dst_stride + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- dst[2 * dst_stride + 0] =
- dst[0 * dst_stride + 1] = (pp[1] + pp[2] + 1) >> 1;
- dst[1 * dst_stride + 1] =
- dst[3 * dst_stride + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[2 * dst_stride + 1] =
- dst[0 * dst_stride + 2] = (pp[2] + pp[3] + 1) >> 1;
- dst[3 * dst_stride + 1] =
- dst[1 * dst_stride + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- dst[0 * dst_stride + 3] =
- dst[2 * dst_stride + 2] = (pp[3] + pp[4] + 1) >> 1;
- dst[1 * dst_stride + 3] =
- dst[3 * dst_stride + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- dst[2 * dst_stride + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- dst[3 * dst_stride + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- }
- break;
-
- case B_HD_PRED:
- {
- unsigned char pp[9];
- pp[0] = Left[3];
- pp[1] = Left[2];
- pp[2] = Left[1];
- pp[3] = Left[0];
- pp[4] = top_left;
- pp[5] = Above[0];
- pp[6] = Above[1];
- pp[7] = Above[2];
- pp[8] = Above[3];
-
-
- dst[3 * dst_stride + 0] = (pp[0] + pp[1] + 1) >> 1;
- dst[3 * dst_stride + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- dst[2 * dst_stride + 0] =
- dst[3 * dst_stride + 2] = (pp[1] + pp[2] + 1) >> 1;
- dst[2 * dst_stride + 1] =
- dst[3 * dst_stride + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[2 * dst_stride + 2] =
- dst[1 * dst_stride + 0] = (pp[2] + pp[3] + 1) >> 1;
- dst[2 * dst_stride + 3] =
- dst[1 * dst_stride + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- dst[1 * dst_stride + 2] =
- dst[0 * dst_stride + 0] = (pp[3] + pp[4] + 1) >> 1;
- dst[1 * dst_stride + 3] =
- dst[0 * dst_stride + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- dst[0 * dst_stride + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- dst[0 * dst_stride + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- }
- break;
-
-
- case B_HU_PRED:
- {
- unsigned char *pp = Left;
- dst[0 * dst_stride + 0] = (pp[0] + pp[1] + 1) >> 1;
- dst[0 * dst_stride + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- dst[0 * dst_stride + 2] =
- dst[1 * dst_stride + 0] = (pp[1] + pp[2] + 1) >> 1;
- dst[0 * dst_stride + 3] =
- dst[1 * dst_stride + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[1 * dst_stride + 2] =
- dst[2 * dst_stride + 0] = (pp[2] + pp[3] + 1) >> 1;
- dst[1 * dst_stride + 3] =
- dst[2 * dst_stride + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
- dst[2 * dst_stride + 2] =
- dst[2 * dst_stride + 3] =
- dst[3 * dst_stride + 0] =
- dst[3 * dst_stride + 1] =
- dst[3 * dst_stride + 2] =
- dst[3 * dst_stride + 3] = pp[3];
- }
- break;
-
- default:
- break;
-
- }
+ pred[b_mode](dst, dst_stride, Above, Left);
}
--- a/vp8/common/reconintra4x4.h
+++ b/vp8/common/reconintra4x4.h
@@ -18,7 +18,7 @@
#endif
static void intra_prediction_down_copy(MACROBLOCKD *xd,
- unsigned char *above_right_src)
+ unsigned char *above_right_src)
{
int dst_stride = xd->dst.y_stride;
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
@@ -32,6 +32,14 @@
*dst_ptr1 = *src_ptr;
*dst_ptr2 = *src_ptr;
}
+
+void vp8_intra4x4_predict(unsigned char *Above,
+ unsigned char *yleft, int left_stride,
+ B_PREDICTION_MODE b_mode,
+ unsigned char *dst, int dst_stride,
+ unsigned char top_left);
+
+void vp8_init_intra4x4_predictors_internal(void);
#ifdef __cplusplus
} // extern "C"
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -152,10 +152,6 @@
$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
-add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left";
-specialize qw/vp8_intra4x4_predict media/;
-$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6;
-
#
# Postproc
#
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -145,7 +145,6 @@
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/loopfilter_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/simpleloopfilter_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/sixtappredict8x4_v6$(ASM)
-VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/intra4x4_predict_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequant_idct_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequantize_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_blk_v6.c
--- a/vpx_dsp/intrapred.c
+++ b/vpx_dsp/intrapred.c
@@ -247,6 +247,38 @@
}
}
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int H = above[-1];
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int L = left[3];
+
+ memset(dst + stride * 0, AVG3(H, I, J), 4);
+ memset(dst + stride * 1, AVG3(I, J, K), 4);
+ memset(dst + stride * 2, AVG3(J, K, L), 4);
+ memset(dst + stride * 3, AVG3(K, L, L), 4);
+}
+
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int H = above[-1];
+ const int I = above[0];
+ const int J = above[1];
+ const int K = above[2];
+ const int L = above[3];
+ const int M = above[4];
+
+ dst[0] = AVG3(H, I, J);
+ dst[1] = AVG3(I, J, K);
+ dst[2] = AVG3(J, K, L);
+ dst[3] = AVG3(K, L, M);
+ memcpy(dst + stride * 1, dst, 4);
+ memcpy(dst + stride * 2, dst, 4);
+ memcpy(dst + stride * 3, dst, 4);
+}
+
void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const int I = left[0];
@@ -287,6 +319,30 @@
DST(3, 3) = AVG3(E, F, G); // differs from vp8
}
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ const int E = above[4];
+ const int F = above[5];
+ const int G = above[6];
+ const int H = above[7];
+ (void)left;
+ DST(0, 0) = AVG2(A, B);
+ DST(1, 0) = DST(0, 2) = AVG2(B, C);
+ DST(2, 0) = DST(1, 2) = AVG2(C, D);
+ DST(3, 0) = DST(2, 2) = AVG2(D, E);
+ DST(3, 2) = AVG3(E, F, G);
+
+ DST(0, 1) = AVG3(A, B, C);
+ DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+ DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+ DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+ DST(3, 3) = AVG3(F, G, H);
+}
+
void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const int A = above[0];
@@ -306,6 +362,27 @@
DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
DST(3, 3) = H; // differs from vp8
+}
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ const int E = above[4];
+ const int F = above[5];
+ const int G = above[6];
+ const int H = above[7];
+ (void)stride;
+ (void)left;
+ DST(0, 0) = AVG3(A, B, C);
+ DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+ DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
+ DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+ DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+ DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+ DST(3, 3) = AVG3(G, H, H);
}
void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -60,12 +60,21 @@
add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
+add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_4x4/;
+
add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc";
+add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_4x4/;
+
add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
+add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_he_predictor_4x4/;
+
add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_4x4/;
@@ -77,6 +86,9 @@
add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc";
+
+add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_ve_predictor_4x4/;
add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";