ref: 9f6eba419a876598aa7edaee9e99610fcbf6362b
parent: 643c75d90b71af8fc636367cb8552b3ae9ee141d
author: Frank Galligan <[email protected]>
date: Thu Jan 15 14:29:46 EST 2015
Add Neon intrinsic vp9_fdct8x8_quant_neon On Nexus 7 speed -5 got ~2%, -6 got ~15%, -7 and -8 got ~30% increase in perf. Tested on Nexus 7, built with ndk r10d, gcc 4.9. Change-Id: I83246d63b96674d170098a572fa4fe28a05aaf51
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1166,7 +1166,7 @@
specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_fdct8x8_quant sse2 ssse3/;
+ specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
}
#
--- a/vp9/encoder/arm/neon/vp9_dct_neon.c
+++ b/vp9/encoder/arm/neon/vp9_dct_neon.c
@@ -32,6 +32,24 @@
}
}
+void vp9_fdct8x8_quant_neon(const int16_t *input, int stride,
+ int16_t* coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t* zbin_ptr,
+ const int16_t* round_ptr, const int16_t* quant_ptr,
+ const int16_t* quant_shift_ptr,
+ int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr,
+ const int16_t* dequant_ptr, uint16_t* eob_ptr,
+ const int16_t* scan_ptr,
+ const int16_t* iscan_ptr) {
+ int16_t temp_buffer[64];
+ (void)coeff_ptr;
+
+ vp9_fdct8x8_neon(input, temp_buffer, stride);
+ vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
+ dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
+}
+
void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
int i;
// stage 1