ref: a10a5cb3567a0cfcea73c2e3765bc86d427646e3
parent: 5599e4275ae73b428ea8d993ba539f9f57de516d
parent: 0178d974e529c0e7592ccd1be926d91b8935eb0d
author: Linfeng Zhang <[email protected]>
date: Wed May 3 15:17:55 EDT 2017
Merge changes I8bb660de,Ica51d780,I6037525d * changes: Clean specializes of idct functions Clean add_protos of highbd idct functions Clean add_protos of idct functions
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -563,234 +563,162 @@
#
# Inverse transform
if (vpx_config("CONFIG_VP9") eq "yes") {
-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- # Note as optimized versions of these functions are added we need to add a check to ensure
- # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
- add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_iwht4x4_16_add sse2/;
+add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- specialize qw/vpx_highbd_idct4x4_1_add neon/;
+add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- specialize qw/vpx_highbd_idct8x8_1_add neon/;
+add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- specialize qw/vpx_highbd_idct16x16_1_add neon/;
+add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- specialize qw/vpx_highbd_idct32x32_1_add neon sse2/;
+add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- # Force C versions if CONFIG_EMULATE_HARDWARE is 1
- if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
- add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
+ specialize qw/vpx_idct4x4_16_add neon sse2/;
- add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct4x4_1_add neon sse2/;
- add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/;
- add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/;
- add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct8x8_1_add neon sse2/;
- add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct16x16_256_add neon sse2/;
- add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct16x16_38_add neon sse2/;
+ $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2;
- add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_idct16x16_10_add neon sse2/;
- add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_idct16x16_1_add neon sse2/;
- add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/;
- add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/;
+ $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
- add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
- add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_idct32x32_1_add neon sse2/;
+} # !CONFIG_EMULATE_HARDWARE
- add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ # Note as optimized versions of these functions are added we need to add a check to ensure
+ # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
+ specialize qw/vpx_iwht4x4_16_add sse2/;
- add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- } else {
- add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct4x4_16_add neon sse2/;
+ add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_highbd_idct4x4_1_add neon/;
- add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct4x4_1_add neon sse2/;
+ add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/;
+ add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/;
+ add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_highbd_idct8x8_1_add neon/;
- add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct8x8_1_add neon sse2/;
+ add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct16x16_256_add neon sse2/;
+ add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct16x16_38_add neon sse2/;
- $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2;
+ add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct16x16_10_add neon sse2/;
+ add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_highbd_idct16x16_1_add neon/;
- add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct16x16_1_add neon sse2/;
+ add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/;
+ add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/;
- # Need to add 135 eob idct32x32 implementations.
- $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
+ add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
+ add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ specialize qw/vpx_highbd_idct32x32_1_add neon sse2/;
- add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct32x32_1_add neon sse2/;
+ add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
- add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+
+ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vpx_highbd_idct4x4_16_add neon sse2/;
- add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct8x8_64_add neon sse2/;
- add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct8x8_12_add neon sse2/;
- add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct16x16_256_add neon sse2/;
- add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct16x16_38_add neon sse2/;
$vpx_highbd_idct16x16_38_add_sse2=vpx_highbd_idct16x16_256_add_sse2;
- add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct16x16_10_add neon sse2/;
- add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct32x32_1024_add neon/;
- add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct32x32_135_add neon/;
- add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct32x32_34_add neon/;
- } # CONFIG_EMULATE_HARDWARE
+ } # !CONFIG_EMULATE_HARDWARE
} else {
- # Force C versions if CONFIG_EMULATE_HARDWARE is 1
- if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
- add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
+ specialize qw/vpx_idct4x4_16_add dspr2 msa/;
- add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct4x4_1_add dspr2 msa/;
- add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct8x8_64_add dspr2 msa/;
- add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct8x8_12_add dspr2 msa/;
- add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct8x8_1_add dspr2 msa/;
- add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
+ specialize qw/vpx_idct16x16_256_add dspr2 msa/;
- add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-
- add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-
- add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-
- add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-
- add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-
- add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-
- add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-
- add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-
- add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- } else {
- add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/;
-
- add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/;
-
- add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/;
-
- add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct8x8_64_add sse2 ssse3 neon dspr2 msa/;
-
- add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct8x8_12_add sse2 ssse3 neon dspr2 msa/;
-
- add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/;
-
- add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/;
-
- add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct16x16_38_add sse2 neon dspr2 msa/;
- $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2;
+ specialize qw/vpx_idct16x16_38_add dspr2 msa/;
$vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2;
$vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa;
- add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/;
+ specialize qw/vpx_idct16x16_10_add dspr2 msa/;
- add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct32x32_1024_add sse2 ssse3 neon dspr2 msa/;
+ specialize qw/vpx_idct16x16_1_add dspr2 msa/;
- add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct32x32_135_add sse2 ssse3 neon dspr2 msa/;
- $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
+ specialize qw/vpx_idct32x32_1024_add dspr2 msa/;
+
+ specialize qw/vpx_idct32x32_135_add dspr2 msa/;
$vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2;
$vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa;
- add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct32x32_34_add sse2 ssse3 neon dspr2 msa/;
+ specialize qw/vpx_idct32x32_34_add dspr2 msa/;
- add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/;
+ specialize qw/vpx_idct32x32_1_add dspr2 msa/;
- add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
- specialize qw/vpx_iwht4x4_1_add msa/;
-
- add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_iwht4x4_16_add msa sse2/;
- } # CONFIG_EMULATE_HARDWARE
+
+ specialize qw/vpx_iwht4x4_1_add msa/;
+ } # !CONFIG_EMULATE_HARDWARE
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9