ref: 67cde46dd75ca273e96f64bd77e2c94fd406ece8
parent: e05f4cf8f4a261c96ef50df12fe2635503e14094
author: James Zern <[email protected]>
date: Fri Mar 24 15:24:39 EDT 2017
intrapred: specialize highbd 4x4 predictors d207/d63/d45/d117/d135/d153 ~9-45% better depending on the predictor on 32-bit ARM, similar range on x86-64 this matches the non-highbitdepth implementation BUG=webm:1316 Change-Id: Iddebdf7c58c6f31c47cae04da95c6e5318200e4c
--- a/vpx_dsp/intrapred.c
+++ b/vpx_dsp/intrapred.c
@@ -690,6 +690,144 @@
dst += stride;
}
}
+
+void vpx_highbd_d207_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int L = left[3];
+ (void)above;
+ (void)bd;
+ DST(0, 0) = AVG2(I, J);
+ DST(2, 0) = DST(0, 1) = AVG2(J, K);
+ DST(2, 1) = DST(0, 2) = AVG2(K, L);
+ DST(1, 0) = AVG3(I, J, K);
+ DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+ DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+ DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+void vpx_highbd_d63_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ const int E = above[4];
+ const int F = above[5];
+ const int G = above[6];
+ (void)left;
+ (void)bd;
+ DST(0, 0) = AVG2(A, B);
+ DST(1, 0) = DST(0, 2) = AVG2(B, C);
+ DST(2, 0) = DST(1, 2) = AVG2(C, D);
+ DST(3, 0) = DST(2, 2) = AVG2(D, E);
+ DST(3, 2) = AVG2(E, F); // differs from vp8
+
+ DST(0, 1) = AVG3(A, B, C);
+ DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+ DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+ DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+ DST(3, 3) = AVG3(E, F, G); // differs from vp8
+}
+
+void vpx_highbd_d45_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ const int E = above[4];
+ const int F = above[5];
+ const int G = above[6];
+ const int H = above[7];
+ (void)left;
+ (void)bd;
+ DST(0, 0) = AVG3(A, B, C);
+ DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+ DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
+ DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+ DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+ DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+ DST(3, 3) = H; // differs from vp8
+}
+
+void vpx_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int X = above[-1];
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ (void)bd;
+ DST(0, 0) = DST(1, 2) = AVG2(X, A);
+ DST(1, 0) = DST(2, 2) = AVG2(A, B);
+ DST(2, 0) = DST(3, 2) = AVG2(B, C);
+ DST(3, 0) = AVG2(C, D);
+
+ DST(0, 3) = AVG3(K, J, I);
+ DST(0, 2) = AVG3(J, I, X);
+ DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+ DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+ DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+ DST(3, 1) = AVG3(B, C, D);
+}
+
+void vpx_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int L = left[3];
+ const int X = above[-1];
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ (void)bd;
+ DST(0, 3) = AVG3(J, K, L);
+ DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
+ DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
+ DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
+ DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
+ DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
+ DST(3, 0) = AVG3(D, C, B);
+}
+
+void vpx_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int L = left[3];
+ const int X = above[-1];
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ (void)bd;
+
+ DST(0, 0) = DST(2, 1) = AVG2(I, X);
+ DST(0, 1) = DST(2, 2) = AVG2(J, I);
+ DST(0, 2) = DST(2, 3) = AVG2(K, J);
+ DST(0, 3) = AVG2(L, K);
+
+ DST(3, 0) = AVG3(A, B, C);
+ DST(2, 0) = AVG3(X, A, B);
+ DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+ DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+ DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+ DST(1, 3) = AVG3(L, K, J);
+}
#endif // CONFIG_VP9_HIGHBITDEPTH
// This serves as a wrapper function, so that all the prediction functions
@@ -725,7 +863,6 @@
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32) \
- intra_pred_highbd_sized(type, 4) \
intra_pred_highbd_sized(type, 8) \
intra_pred_highbd_sized(type, 16) \
intra_pred_highbd_sized(type, 32)