ref: 601f3a886e4f643783fd5fe824dcb9b79e251111
parent: ffaebfc7b420e3966c0b66575282d39aac2522ac
author: Yaowu Xu <[email protected]>
date: Tue Sep 16 06:46:18 EDT 2014
Fix a performance regression This commit adds back sse2 or ssse3 optimized versio of a couple of functions, fixes a ~10% performance regression. Change-Id: I049786906e5a641224dced63c6492aec9d86d183
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -838,7 +838,7 @@
specialize qw/vp9_quantize_b_32x32/;
} else {
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/vp9_block_error avx2/;
+ specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp neon/, "$ssse3_x86_64";
@@ -850,7 +850,7 @@
specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b_32x32/;
+ specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
}
#