ref: 9d251f9510765a4ff0ed01689dc5c9985c42ec10
parent: ea5450b2802178ed5b8298f89d7e760000058d59
parent: 0ede9f52b796b6d8e02046b24f68a3db8b9f5920
author: Jingning Han <[email protected]>
date: Tue Jul 7 16:42:18 EDT 2015
Merge "Unify subtract function used in VP8/9"
--- a/test/subtract_test.cc
+++ /dev/null
@@ -1,123 +1,0 @@
-/*
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
-#include "vp8/common/blockd.h"
-#include "vp8/encoder/block.h"
-#include "vpx_mem/vpx_mem.h"
-
-typedef void (*SubtractBlockFunc)(BLOCK *be, BLOCKD *bd, int pitch);
-
-namespace {
-
-class SubtractBlockTest : public ::testing::TestWithParam<SubtractBlockFunc> {
- public:
- virtual void TearDown() {
- libvpx_test::ClearSystemState();
- }
-};
-
-using libvpx_test::ACMRandom;
-
-TEST_P(SubtractBlockTest, SimpleSubtract) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- BLOCK be;
- BLOCKD bd;
- // in libvpx, this stride is always 16
- const int kDiffPredStride = 16;
- const int kSrcStride[] = {32, 16, 8, 4, 0};
- const int kBlockWidth = 4;
- const int kBlockHeight = 4;
-
- // Allocate... align to 16 for mmx/sse tests
- uint8_t *source = reinterpret_cast<uint8_t*>(
- vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source)));
- be.src_diff = reinterpret_cast<int16_t*>(
- vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff)));
- bd.predictor = reinterpret_cast<unsigned char*>(
- vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
-
- for (int i = 0; kSrcStride[i] > 0; ++i) {
- // start at block0
- be.src = 0;
- be.base_src = &source;
- be.src_stride = kSrcStride[i];
-
- // set difference
- int16_t *src_diff = be.src_diff;
- for (int r = 0; r < kBlockHeight; ++r) {
- for (int c = 0; c < kBlockWidth; ++c) {
- src_diff[c] = static_cast<int16_t>(0xa5a5u);
- }
- src_diff += kDiffPredStride;
- }
-
- // set destination
- uint8_t *base_src = *be.base_src;
- for (int r = 0; r < kBlockHeight; ++r) {
- for (int c = 0; c < kBlockWidth; ++c) {
- base_src[c] = rnd.Rand8();
- }
- base_src += be.src_stride;
- }
-
- // set predictor
- uint8_t *predictor = bd.predictor;
- for (int r = 0; r < kBlockHeight; ++r) {
- for (int c = 0; c < kBlockWidth; ++c) {
- predictor[c] = rnd.Rand8();
- }
- predictor += kDiffPredStride;
- }
-
- ASM_REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
-
- base_src = *be.base_src;
- src_diff = be.src_diff;
- predictor = bd.predictor;
- for (int r = 0; r < kBlockHeight; ++r) {
- for (int c = 0; c < kBlockWidth; ++c) {
- EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r
- << ", c = " << c;
- }
- src_diff += kDiffPredStride;
- predictor += kDiffPredStride;
- base_src += be.src_stride;
- }
- }
- vpx_free(be.src_diff);
- vpx_free(source);
- vpx_free(bd.predictor);
-}
-
-INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
- ::testing::Values(vp8_subtract_b_c));
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest,
- ::testing::Values(vp8_subtract_b_neon));
-#endif
-
-#if HAVE_MMX
-INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest,
- ::testing::Values(vp8_subtract_b_mmx));
-#endif
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest,
- ::testing::Values(vp8_subtract_b_sse2));
-#endif
-
-} // namespace
--- a/test/test.mk
+++ b/test/test.mk
@@ -104,7 +104,6 @@
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -343,15 +343,6 @@
specialize qw/vp8_mbuverror mmx sse2/;
$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
-add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
-specialize qw/vp8_subtract_b mmx sse2 neon/;
-
-add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
-specialize qw/vp8_subtract_mby mmx sse2 neon/;
-
-add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
-specialize qw/vp8_subtract_mbuv mmx sse2 neon/;
-
#
# Motion search
#
--- a/vp8/encoder/arm/neon/subtract_neon.c
+++ /dev/null
@@ -1,154 +1,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-#include "vp8/encoder/block.h"
-
-void vp8_subtract_b_neon(
- BLOCK *be,
- BLOCKD *bd,
- int pitch) {
- unsigned char *src_ptr, *predictor;
- int src_stride;
- int16_t *src_diff;
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
- uint16x8_t q10u16, q11u16, q12u16, q13u16;
-
- src_ptr = *be->base_src + be->src;
- src_stride = be->src_stride;
- predictor = bd->predictor;
-
- d0u8 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d2u8 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d4u8 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d6u8 = vld1_u8(src_ptr);
-
- d1u8 = vld1_u8(predictor);
- predictor += pitch;
- d3u8 = vld1_u8(predictor);
- predictor += pitch;
- d5u8 = vld1_u8(predictor);
- predictor += pitch;
- d7u8 = vld1_u8(predictor);
-
- q10u16 = vsubl_u8(d0u8, d1u8);
- q11u16 = vsubl_u8(d2u8, d3u8);
- q12u16 = vsubl_u8(d4u8, d5u8);
- q13u16 = vsubl_u8(d6u8, d7u8);
-
- src_diff = be->src_diff;
- vst1_u16((uint16_t *)src_diff, vget_low_u16(q10u16));
- src_diff += pitch;
- vst1_u16((uint16_t *)src_diff, vget_low_u16(q11u16));
- src_diff += pitch;
- vst1_u16((uint16_t *)src_diff, vget_low_u16(q12u16));
- src_diff += pitch;
- vst1_u16((uint16_t *)src_diff, vget_low_u16(q13u16));
- return;
-}
-
-void vp8_subtract_mby_neon(
- int16_t *diff,
- unsigned char *src,
- int src_stride,
- unsigned char *pred,
- int pred_stride) {
- int i;
- uint8x16_t q0u8, q1u8, q2u8, q3u8;
- uint16x8_t q8u16, q9u16, q10u16, q11u16;
-
- for (i = 0; i < 8; i++) { // subtract_mby_loop
- q0u8 = vld1q_u8(src);
- src += src_stride;
- q2u8 = vld1q_u8(src);
- src += src_stride;
- q1u8 = vld1q_u8(pred);
- pred += pred_stride;
- q3u8 = vld1q_u8(pred);
- pred += pred_stride;
-
- q8u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q1u8));
- q9u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q1u8));
- q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q3u8));
- q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q3u8));
-
- vst1q_u16((uint16_t *)diff, q8u16);
- diff += 8;
- vst1q_u16((uint16_t *)diff, q9u16);
- diff += 8;
- vst1q_u16((uint16_t *)diff, q10u16);
- diff += 8;
- vst1q_u16((uint16_t *)diff, q11u16);
- diff += 8;
- }
- return;
-}
-
-void vp8_subtract_mbuv_neon(
- int16_t *diff,
- unsigned char *usrc,
- unsigned char *vsrc,
- int src_stride,
- unsigned char *upred,
- unsigned char *vpred,
- int pred_stride) {
- int i, j;
- unsigned char *src_ptr, *pred_ptr;
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
- uint16x8_t q8u16, q9u16, q10u16, q11u16;
-
- diff += 256;
- for (i = 0; i < 2; i++) {
- if (i == 0) {
- src_ptr = usrc;
- pred_ptr = upred;
- } else if (i == 1) {
- src_ptr = vsrc;
- pred_ptr = vpred;
- }
-
- for (j = 0; j < 2; j++) {
- d0u8 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d1u8 = vld1_u8(pred_ptr);
- pred_ptr += pred_stride;
- d2u8 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d3u8 = vld1_u8(pred_ptr);
- pred_ptr += pred_stride;
- d4u8 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d5u8 = vld1_u8(pred_ptr);
- pred_ptr += pred_stride;
- d6u8 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d7u8 = vld1_u8(pred_ptr);
- pred_ptr += pred_stride;
-
- q8u16 = vsubl_u8(d0u8, d1u8);
- q9u16 = vsubl_u8(d2u8, d3u8);
- q10u16 = vsubl_u8(d4u8, d5u8);
- q11u16 = vsubl_u8(d6u8, d7u8);
-
- vst1q_u16((uint16_t *)diff, q8u16);
- diff += 8;
- vst1q_u16((uint16_t *)diff, q9u16);
- diff += 8;
- vst1q_u16((uint16_t *)diff, q10u16);
- diff += 8;
- vst1q_u16((uint16_t *)diff, q11u16);
- diff += 8;
- }
- }
- return;
-}
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vpx_dsp_rtcd.h"
#include "vpx_config.h"
#include "vp8_rtcd.h"
@@ -19,80 +20,29 @@
#include "vpx_mem/vpx_mem.h"
#include "rdopt.h"
-// TODO(jingning,johannkoenig): use vpx_subtract_block to replace
-// codec specified vp9_subtract_ functions.
-void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
-{
- unsigned char *src_ptr = (*(be->base_src) + be->src);
- short *diff_ptr = be->src_diff;
- unsigned char *pred_ptr = bd->predictor;
- int src_stride = be->src_stride;
+void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch) {
+ unsigned char *src_ptr = (*(be->base_src) + be->src);
+ short *diff_ptr = be->src_diff;
+ unsigned char *pred_ptr = bd->predictor;
+ int src_stride = be->src_stride;
- int r, c;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- diff_ptr[c] = src_ptr[c] - pred_ptr[c];
- }
-
- diff_ptr += pitch;
- pred_ptr += pitch;
- src_ptr += src_stride;
- }
+ vpx_subtract_block(4, 4, diff_ptr, pitch, src_ptr, src_stride,
+ pred_ptr, pitch);
}
-void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
+void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc,
int src_stride, unsigned char *upred,
- unsigned char *vpred, int pred_stride)
-{
- short *udiff = diff + 256;
- short *vdiff = diff + 320;
+ unsigned char *vpred, int pred_stride) {
+ short *udiff = diff + 256;
+ short *vdiff = diff + 320;
- int r, c;
-
- for (r = 0; r < 8; r++)
- {
- for (c = 0; c < 8; c++)
- {
- udiff[c] = usrc[c] - upred[c];
- }
-
- udiff += 8;
- upred += pred_stride;
- usrc += src_stride;
- }
-
- for (r = 0; r < 8; r++)
- {
- for (c = 0; c < 8; c++)
- {
- vdiff[c] = vsrc[c] - vpred[c];
- }
-
- vdiff += 8;
- vpred += pred_stride;
- vsrc += src_stride;
- }
+ vpx_subtract_block(8, 8, udiff, 8, usrc, src_stride, upred, pred_stride);
+ vpx_subtract_block(8, 8, vdiff, 8, vsrc, src_stride, vpred, pred_stride);
}
-void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride,
- unsigned char *pred, int pred_stride)
-{
- int r, c;
-
- for (r = 0; r < 16; r++)
- {
- for (c = 0; c < 16; c++)
- {
- diff[c] = src[c] - pred[c];
- }
-
- diff += 16;
- pred += pred_stride;
- src += src_stride;
- }
+void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride,
+ unsigned char *pred, int pred_stride) {
+ vpx_subtract_block(16, 16, diff, 16, src, src_stride, pred, pred_stride);
}
static void vp8_subtract_mb(MACROBLOCK *x)
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -19,6 +19,13 @@
#endif
void vp8_encode_inter16x16(MACROBLOCK *x);
+void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch);
+void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc,
+ int src_stride, unsigned char *upred,
+ unsigned char *vpred, int pred_stride);
+void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride,
+ unsigned char *pred, int pred_stride);
+
void vp8_build_dcblock(MACROBLOCK *b);
void vp8_transform_mb(MACROBLOCK *mb);
void vp8_transform_mbuv(MACROBLOCK *x);
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ /dev/null
@@ -1,223 +1,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
-; short *diff, unsigned char *Predictor,
-; int pitch);
-global sym(vp8_subtract_b_mmx_impl) PRIVATE
-sym(vp8_subtract_b_mmx_impl):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
-
- mov rdi, arg(2) ;diff
- mov rax, arg(3) ;Predictor
- mov rsi, arg(0) ;z
- movsxd rdx, dword ptr arg(1);src_stride;
- movsxd rcx, dword ptr arg(4);pitch
- pxor mm7, mm7
-
- movd mm0, [rsi]
- movd mm1, [rax]
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- psubw mm0, mm1
- movq [rdi], mm0
-
-
- movd mm0, [rsi+rdx]
- movd mm1, [rax+rcx]
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- psubw mm0, mm1
- movq [rdi+rcx*2],mm0
-
-
- movd mm0, [rsi+rdx*2]
- movd mm1, [rax+rcx*2]
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- psubw mm0, mm1
- movq [rdi+rcx*4], mm0
-
- lea rsi, [rsi+rdx*2]
- lea rcx, [rcx+rcx*2]
-
-
-
- movd mm0, [rsi+rdx]
- movd mm1, [rax+rcx]
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- psubw mm0, mm1
- movq [rdi+rcx*2], mm0
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
-;unsigned char *pred, int pred_stride)
-global sym(vp8_subtract_mby_mmx) PRIVATE
-sym(vp8_subtract_mby_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- mov rdi, arg(0) ;diff
- mov rsi, arg(1) ;src
- movsxd rdx, dword ptr arg(2);src_stride
- mov rax, arg(3) ;pred
- push rbx
- movsxd rbx, dword ptr arg(4);pred_stride
-
- pxor mm0, mm0
- mov rcx, 16
-
-
-.submby_loop:
- movq mm1, [rsi]
- movq mm3, [rax]
-
- movq mm2, mm1
- movq mm4, mm3
-
- punpcklbw mm1, mm0
- punpcklbw mm3, mm0
-
- punpckhbw mm2, mm0
- punpckhbw mm4, mm0
-
- psubw mm1, mm3
- psubw mm2, mm4
-
- movq [rdi], mm1
- movq [rdi+8], mm2
-
- movq mm1, [rsi+8]
- movq mm3, [rax+8]
-
- movq mm2, mm1
- movq mm4, mm3
-
- punpcklbw mm1, mm0
- punpcklbw mm3, mm0
-
- punpckhbw mm2, mm0
- punpckhbw mm4, mm0
-
- psubw mm1, mm3
- psubw mm2, mm4
-
- movq [rdi+16], mm1
- movq [rdi+24], mm2
- add rdi, 32
- lea rax, [rax+rbx]
- lea rsi, [rsi+rdx]
- dec rcx
- jnz .submby_loop
-
- pop rbx
- pop rdi
- pop rsi
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc,
-; int src_stride, unsigned char *upred,
-; unsigned char *vpred, int pred_stride)
-
-global sym(vp8_subtract_mbuv_mmx) PRIVATE
-sym(vp8_subtract_mbuv_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- push rsi
- push rdi
- ; end prolog
-
- mov rdi, arg(0) ;diff
- mov rsi, arg(1) ;usrc
- movsxd rdx, dword ptr arg(3);src_stride;
- mov rax, arg(4) ;upred
- add rdi, 256*2 ;diff = diff + 256 (shorts)
- mov rcx, 8
- push rbx
- movsxd rbx, dword ptr arg(6);pred_stride
-
- pxor mm7, mm7
-
-.submbu_loop:
- movq mm0, [rsi]
- movq mm1, [rax]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi], mm0
- movq [rdi+8], mm3
- add rdi, 16
- add rsi, rdx
- add rax, rbx
-
- dec rcx
- jnz .submbu_loop
-
- mov rsi, arg(2) ;vsrc
- mov rax, arg(5) ;vpred
- mov rcx, 8
-
-.submbv_loop:
- movq mm0, [rsi]
- movq mm1, [rax]
- movq mm3, mm0
- movq mm4, mm1
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- punpckhbw mm3, mm7
- punpckhbw mm4, mm7
- psubw mm0, mm1
- psubw mm3, mm4
- movq [rdi], mm0
- movq [rdi+8], mm3
- add rdi, 16
- add rsi, rdx
- add rax, rbx
-
- dec rcx
- jnz .submbv_loop
-
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
--- a/vp8/encoder/x86/subtract_sse2.asm
+++ /dev/null
@@ -1,245 +1,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
-; short *diff, unsigned char *Predictor,
-; int pitch);
-global sym(vp8_subtract_b_sse2_impl) PRIVATE
-sym(vp8_subtract_b_sse2_impl):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rdi, arg(2) ;diff
- mov rax, arg(3) ;Predictor
- mov rsi, arg(0) ;z
- movsxd rdx, dword ptr arg(1);src_stride;
- movsxd rcx, dword ptr arg(4);pitch
- pxor mm7, mm7
-
- movd mm0, [rsi]
- movd mm1, [rax]
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- psubw mm0, mm1
- movq MMWORD PTR [rdi], mm0
-
- movd mm0, [rsi+rdx]
- movd mm1, [rax+rcx]
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- psubw mm0, mm1
- movq MMWORD PTR [rdi+rcx*2], mm0
-
- movd mm0, [rsi+rdx*2]
- movd mm1, [rax+rcx*2]
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- psubw mm0, mm1
- movq MMWORD PTR [rdi+rcx*4], mm0
-
- lea rsi, [rsi+rdx*2]
- lea rcx, [rcx+rcx*2]
-
- movd mm0, [rsi+rdx]
- movd mm1, [rax+rcx]
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- psubw mm0, mm1
- movq MMWORD PTR [rdi+rcx*2], mm0
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
-;unsigned char *pred, int pred_stride)
-global sym(vp8_subtract_mby_sse2) PRIVATE
-sym(vp8_subtract_mby_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rdi, arg(0) ;diff
- mov rsi, arg(1) ;src
- movsxd rdx, dword ptr arg(2);src_stride
- mov rax, arg(3) ;pred
- movdqa xmm4, [GLOBAL(t80)]
- push rbx
- mov rcx, 8 ; do two lines at one time
- movsxd rbx, dword ptr arg(4);pred_stride
-
-.submby_loop:
- movdqa xmm0, [rsi] ; src
- movdqa xmm1, [rax] ; pred
-
- movdqa xmm2, xmm0
- psubb xmm0, xmm1
-
- pxor xmm1, xmm4 ;convert to signed values
- pxor xmm2, xmm4
- pcmpgtb xmm1, xmm2 ; obtain sign information
-
- movdqa xmm2, xmm0
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm1 ; put sign back to subtraction
-
- movdqa xmm3, [rsi + rdx]
- movdqa xmm5, [rax + rbx]
-
- lea rsi, [rsi+rdx*2]
- lea rax, [rax+rbx*2]
-
- movdqa [rdi], xmm0
- movdqa [rdi +16], xmm2
-
- movdqa xmm1, xmm3
- psubb xmm3, xmm5
-
- pxor xmm5, xmm4 ;convert to signed values
- pxor xmm1, xmm4
- pcmpgtb xmm5, xmm1 ; obtain sign information
-
- movdqa xmm1, xmm3
- punpcklbw xmm3, xmm5 ; put sign back to subtraction
- punpckhbw xmm1, xmm5 ; put sign back to subtraction
-
- movdqa [rdi +32], xmm3
- movdqa [rdi +48], xmm1
-
- add rdi, 64
- dec rcx
- jnz .submby_loop
-
- pop rbx
- pop rdi
- pop rsi
- ; begin epilog
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
-; int src_stride, unsigned char *upred,
-; unsigned char *vpred, int pred_stride)
-global sym(vp8_subtract_mbuv_sse2) PRIVATE
-sym(vp8_subtract_mbuv_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movdqa xmm4, [GLOBAL(t80)]
- mov rdi, arg(0) ;diff
- mov rsi, arg(1) ;usrc
- movsxd rdx, dword ptr arg(3);src_stride;
- mov rax, arg(4) ;upred
- add rdi, 256*2 ;diff = diff + 256 (shorts)
- mov rcx, 4
- push rbx
- movsxd rbx, dword ptr arg(6);pred_stride
-
- ;u
-.submbu_loop:
- movq xmm0, [rsi] ; src
- movq xmm2, [rsi+rdx] ; src -- next line
- movq xmm1, [rax] ; pred
- movq xmm3, [rax+rbx] ; pred -- next line
- lea rsi, [rsi + rdx*2]
- lea rax, [rax + rbx*2]
-
- punpcklqdq xmm0, xmm2
- punpcklqdq xmm1, xmm3
-
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
-
- pxor xmm1, xmm4 ;convert to signed values
- pxor xmm2, xmm4
- pcmpgtb xmm1, xmm2 ; obtain sign information
-
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
-
- movdqa [rdi], xmm0 ; store difference
- movdqa [rdi +16], xmm2 ; store difference
- add rdi, 32
- sub rcx, 1
- jnz .submbu_loop
-
- mov rsi, arg(2) ;vsrc
- mov rax, arg(5) ;vpred
- mov rcx, 4
-
- ;v
-.submbv_loop:
- movq xmm0, [rsi] ; src
- movq xmm2, [rsi+rdx] ; src -- next line
- movq xmm1, [rax] ; pred
- movq xmm3, [rax+rbx] ; pred -- next line
- lea rsi, [rsi + rdx*2]
- lea rax, [rax + rbx*2]
-
- punpcklqdq xmm0, xmm2
- punpcklqdq xmm1, xmm3
-
- movdqa xmm2, xmm0
- psubb xmm0, xmm1 ; subtraction with sign missed
-
- pxor xmm1, xmm4 ;convert to signed values
- pxor xmm2, xmm4
- pcmpgtb xmm1, xmm2 ; obtain sign information
-
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- punpcklbw xmm0, xmm1 ; put sign back to subtraction
- punpckhbw xmm2, xmm3 ; put sign back to subtraction
-
- movdqa [rdi], xmm0 ; store difference
- movdqa [rdi +16], xmm2 ; store difference
- add rdi, 32
- sub rcx, 1
- jnz .submbv_loop
-
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-t80:
- times 16 db 0x80
--- a/vp8/encoder/x86/vp8_enc_stubs_mmx.c
+++ b/vp8/encoder/x86/vp8_enc_stubs_mmx.c
@@ -65,14 +65,3 @@
return vp8_mbuverror_mmx_impl(s_ptr, d_ptr);
}
-void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
- short *diff, unsigned char *predictor,
- int pitch);
-void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
-{
- unsigned char *z = *(be->base_src) + be->src;
- unsigned int src_stride = be->src_stride;
- short *diff = &be->src_diff[0];
- unsigned char *predictor = &bd->predictor[0];
- vp8_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch);
-}
--- a/vp8/encoder/x86/vp8_enc_stubs_sse2.c
+++ b/vp8/encoder/x86/vp8_enc_stubs_sse2.c
@@ -30,14 +30,3 @@
return vp8_mbuverror_xmm_impl(s_ptr, d_ptr);
}
-void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
- short *diff, unsigned char *predictor,
- int pitch);
-void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
-{
- unsigned char *z = *(be->base_src) + be->src;
- unsigned int src_stride = be->src_stride;
- short *diff = &be->src_diff[0];
- unsigned char *predictor = &bd->predictor[0];
- vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch);
-}
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -82,7 +82,6 @@
endif
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm
-VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
@@ -94,7 +93,6 @@
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
endif
-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -25,5 +25,4 @@
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/fastquantizeb_neon.c
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/shortfdct_neon.c
-VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/subtract_neon.c
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_shortwalsh4x4_neon.c