ref: ad55b1d270db717a1f5c1c4966e7aecf9a563e5f
parent: 7b9c86167eb7161ab98cab66fab6e63a2c76c29e
parent: 1364cb58b4372c0f6f377c938f1eca789ffd120c
author: Johann Koenig <[email protected]>
date: Thu Sep 29 19:16:44 EDT 2016
Merge changes Ia3e9122f,Id33eb6c8,I956bd8ce * changes: Remove vp8_clear_system_state vpx_dsp: clean up rtcd vp8: clean up rtcd
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -83,8 +83,6 @@
}
#endif
-void vp8_clear_system_state_c(){};
-
void vp8_machine_specific_config(VP8_COMMON *ctx) {
#if CONFIG_MULTITHREAD
ctx->processor_core_count = get_cpu_count();
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -12,6 +12,7 @@
#include "vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "vpx_dsp/postproc.h"
+#include "vpx_ports/system_state.h"
#include "vpx_scale_rtcd.h"
#include "vpx_scale/yv12config.h"
#include "postproc.h"
@@ -321,7 +322,7 @@
}
}
- vp8_clear_system_state();
+ vpx_clear_system_state();
if ((flags & VP8D_MFQE) && oci->postproc_state.last_frame_valid &&
oci->current_video_frame >= 2 &&
@@ -363,7 +364,7 @@
oci->postproc_state.last_noise != noise_level) {
double sigma;
struct postproc_state *ppstate = &oci->postproc_state;
- vp8_clear_system_state();
+ vpx_clear_system_state();
sigma = noise_level + .5 + .6 * q / 63.0;
ppstate->clamp =
vpx_setup_noise(sigma, ppstate->generated_noise, oci->Width + 256);
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -19,13 +19,6 @@
forward_decls qw/vp8_common_forward_decls/;
#
-# system state
-#
-add_proto qw/void vp8_clear_system_state/, "";
-specialize qw/vp8_clear_system_state mmx/;
-$vp8_clear_system_state_mmx=vpx_reset_mmx_state;
-
-#
# Dequant
#
add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
@@ -33,15 +26,12 @@
add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
specialize qw/vp8_dequant_idct_add mmx neon dspr2 msa/;
-$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
specialize qw/vp8_dequant_idct_add_y_block mmx sse2 neon dspr2 msa/;
-$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 neon dspr2 msa/;
-$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
#
# Loopfilter
@@ -48,19 +38,15 @@
#
add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
specialize qw/vp8_loop_filter_mbv mmx sse2 neon dspr2 msa/;
-$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
specialize qw/vp8_loop_filter_bv mmx sse2 neon dspr2 msa/;
-$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
specialize qw/vp8_loop_filter_mbh mmx sse2 neon dspr2 msa/;
-$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
specialize qw/vp8_loop_filter_bh mmx sse2 neon dspr2 msa/;
-$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
@@ -101,23 +87,18 @@
#idct16
add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa/;
-$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;
#iwalsh1
add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
-$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;
-# no asm yet
#iwalsh16
add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
specialize qw/vp8_short_inv_walsh4x4 mmx sse2 neon dspr2 msa/;
-$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;
#idct1_scalar_add
add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa/;
-$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
#
# RECON
@@ -124,15 +105,12 @@
#
add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
specialize qw/vp8_copy_mem16x16 mmx sse2 neon dspr2 msa/;
-$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;
add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
specialize qw/vp8_copy_mem8x8 mmx neon dspr2 msa/;
-$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;
add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa/;
-$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
#
# Postproc
@@ -140,13 +118,10 @@
if (vpx_config("CONFIG_POSTPROC") eq "yes") {
add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
- # no asm yet
add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
- # no asm yet
add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
- # no asm yet
add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
specialize qw/vp8_filter_by_weight16x16 sse2 msa/;
@@ -155,7 +130,6 @@
specialize qw/vp8_filter_by_weight8x8 sse2 msa/;
add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
- # no asm yet
}
#
@@ -163,19 +137,15 @@
#
add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 neon dspr2 msa/;
-$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;
add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 neon dspr2 msa/;
-$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;
add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 neon dspr2 msa/;
-$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_sixtap_predict4x4 mmx ssse3 neon dspr2 msa/;
-$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 neon msa/;
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -29,6 +29,7 @@
#include "./vpx_scale_rtcd.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/systemdependent.h"
+#include "vpx_ports/system_state.h"
#include "vpx_ports/vpx_once.h"
#include "vpx_ports/vpx_timer.h"
#include "detokenize.h"
@@ -352,7 +353,7 @@
goto decode_exit;
}
- vp8_clear_system_state();
+ vpx_clear_system_state();
if (cm->show_frame) {
cm->current_video_frame++;
@@ -383,7 +384,7 @@
decode_exit:
pbi->common.error.setjmp = 0;
- vp8_clear_system_state();
+ vpx_clear_system_state();
return retcode;
}
int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd,
@@ -416,7 +417,7 @@
}
#endif /*!CONFIG_POSTPROC*/
- vp8_clear_system_state();
+ vpx_clear_system_state();
return ret;
}
@@ -447,7 +448,7 @@
if (setjmp(fb->pbi[0]->common.error.jmp)) {
vp8_remove_decoder_instances(fb);
memset(fb->pbi, 0, sizeof(fb->pbi) / sizeof(fb->pbi[0]));
- vp8_clear_system_state();
+ vpx_clear_system_state();
return VPX_CODEC_ERROR;
}
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -19,6 +19,7 @@
#include <limits.h>
#include "vpx/vpx_encoder.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/system_state.h"
#include "bitstream.h"
#include "defaultcoefcounts.h"
@@ -843,7 +844,7 @@
int new_intra, new_last, new_garf, oldtotal, newtotal;
int ref_frame_cost[MAX_REF_FRAMES];
- vp8_clear_system_state();
+ vpx_clear_system_state();
if (cpi->common.frame_type != KEY_FRAME) {
if (!(new_intra = rf_intra * 255 / (rf_intra + rf_inter))) new_intra = 1;
@@ -908,7 +909,7 @@
#endif
int savings = 0;
- vp8_clear_system_state();
+ vpx_clear_system_state();
do {
int j = 0;
@@ -1295,7 +1296,7 @@
#endif
- vp8_clear_system_state();
+ vpx_clear_system_state();
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
pack_coef_probs(cpi);
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -12,6 +12,7 @@
#include "encodemv.h"
#include "vp8/common/entropymode.h"
#include "vp8/common/systemdependent.h"
+#include "vpx_ports/system_state.h"
#include <math.h>
@@ -126,7 +127,7 @@
unsigned int cost0 = 0;
unsigned int cost1 = 0;
- vp8_clear_system_state();
+ vpx_clear_system_state();
i = 1;
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -26,6 +26,7 @@
#include "vpx_scale/vpx_scale.h"
#include "encodemb.h"
#include "vp8/common/extend.h"
+#include "vpx_ports/system_state.h"
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/swapyv12buffer.h"
#include "rdopt.h"
@@ -499,7 +500,7 @@
zero_ref_mv.as_int = 0;
- vp8_clear_system_state();
+ vpx_clear_system_state();
x->src = *cpi->Source;
xd->pre = *lst_yv12;
@@ -741,10 +742,10 @@
/* extend the recon for intra prediction */
vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8,
xd->dst.v_buffer + 8);
- vp8_clear_system_state();
+ vpx_clear_system_state();
}
- vp8_clear_system_state();
+ vpx_clear_system_state();
{
double weight = 0.0;
@@ -1655,7 +1656,7 @@
cpi->twopass.gf_group_bits = 0;
cpi->twopass.gf_decay_rate = 0;
- vp8_clear_system_state();
+ vpx_clear_system_state();
start_pos = cpi->twopass.stats_in;
@@ -2268,7 +2269,7 @@
return;
}
- vp8_clear_system_state();
+ vpx_clear_system_state();
if (EOF == input_stats(cpi, &this_frame)) return;
@@ -2543,7 +2544,7 @@
memset(&next_frame, 0, sizeof(next_frame));
- vp8_clear_system_state();
+ vpx_clear_system_state();
start_position = cpi->twopass.stats_in;
cpi->common.frame_type = KEY_FRAME;
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -33,6 +33,7 @@
#include "vp8/common/reconintra.h"
#include "vp8/common/swapyv12buffer.h"
#include "vp8/common/threading.h"
+#include "vpx_ports/system_state.h"
#include "vpx_ports/vpx_timer.h"
#if ARCH_ARM
#include "vpx_ports/arm.h"
@@ -2296,7 +2297,7 @@
recon += recon_stride;
}
- vp8_clear_system_state();
+ vpx_clear_system_state();
return total_sse;
}
@@ -2691,7 +2692,7 @@
if (cpi->Speed > 11) return 0;
/* Clear down mmx registers */
- vp8_clear_system_state();
+ vpx_clear_system_state();
if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0)) {
double change = 1.0 *
@@ -3129,7 +3130,7 @@
} else {
struct vpx_usec_timer timer;
- vp8_clear_system_state();
+ vpx_clear_system_state();
vpx_usec_timer_start(&timer);
if (cpi->sf.auto_filter == 0) {
@@ -3217,7 +3218,7 @@
int drop_mark25 = drop_mark / 8;
/* Clear down mmx registers to allow floating point in what follows */
- vp8_clear_system_state();
+ vpx_clear_system_state();
if (cpi->force_next_frame_intra) {
cm->frame_type = KEY_FRAME; /* delayed intra frame */
@@ -3576,7 +3577,7 @@
* There is some odd behavior for one pass here that needs attention.
*/
if ((cpi->pass == 2) || (cpi->ni_frames > 150)) {
- vp8_clear_system_state();
+ vpx_clear_system_state();
Q = cpi->active_worst_quality;
@@ -3802,7 +3803,7 @@
#endif
do {
- vp8_clear_system_state();
+ vpx_clear_system_state();
vp8_set_quantizer(cpi, Q);
@@ -3935,7 +3936,7 @@
cpi->projected_frame_size =
(cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
#endif
- vp8_clear_system_state();
+ vpx_clear_system_state();
/* Test to see if the stats generated for this frame indicate that
* we should have coded a key frame (assuming that we didn't)!
@@ -3979,7 +3980,7 @@
#endif
}
- vp8_clear_system_state();
+ vpx_clear_system_state();
if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
@@ -4549,7 +4550,7 @@
{
FILE *f = fopen("tmp.stt", "a");
- vp8_clear_system_state();
+ vpx_clear_system_state();
if (cpi->twopass.total_left_stats.coded_error != 0.0)
fprintf(f, "%10d %10d %10d %10d %10d %10"PRId64" %10"PRId64
@@ -4779,7 +4780,7 @@
if (setjmp(cpi->common.error.jmp)) {
cpi->common.error.setjmp = 0;
- vp8_clear_system_state();
+ vpx_clear_system_state();
return VPX_CODEC_CORRUPT_FRAME;
}
@@ -4986,7 +4987,7 @@
*size = 0;
/* Clear down mmx registers */
- vp8_clear_system_state();
+ vpx_clear_system_state();
cm->frame_type = INTER_FRAME;
cm->frame_flags = *frame_flags;
@@ -5139,7 +5140,7 @@
vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer,
cm->filter_level * 10 / 6, 1, 0);
- vp8_clear_system_state();
+ vpx_clear_system_state();
ye = calc_plane_error(orig->y_buffer, orig->y_stride, pp->y_buffer,
pp->y_stride, y_width, y_height);
@@ -5249,7 +5250,7 @@
}
#endif
- vp8_clear_system_state();
+ vpx_clear_system_state();
return ret;
}
}
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -22,6 +22,7 @@
#include "vp8/common/systemdependent.h"
#include "encodemv.h"
#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_ports/system_state.h"
#define MIN_BPB_FACTOR 0.01
#define MAX_BPB_FACTOR 50
@@ -296,7 +297,7 @@
uint64_t target;
/* Clear down mmx registers to allow floating point in what follows */
- vp8_clear_system_state();
+ vpx_clear_system_state();
if (cpi->oxcf.fixed_q >= 0) {
int Q = cpi->oxcf.key_q;
@@ -1019,7 +1020,7 @@
int projected_size_based_on_q = 0;
/* Clear down mmx registers to allow floating point in what follows */
- vp8_clear_system_state();
+ vpx_clear_system_state();
if (cpi->common.frame_type == KEY_FRAME) {
rate_correction_factor = cpi->key_frame_rate_correction_factor;
@@ -1302,7 +1303,7 @@
void vp8_adjust_key_frame_context(VP8_COMP *cpi) {
/* Clear down mmx registers to allow floating point in what follows */
- vp8_clear_system_state();
+ vpx_clear_system_state();
/* Do we have any key frame overspend to recover? */
/* Two-pass overspend handled elsewhere. */
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -30,6 +30,7 @@
#include "encodemb.h"
#include "vp8/encoder/quantize.h"
#include "vpx_dsp/variance.h"
+#include "vpx_ports/system_state.h"
#include "mcomp.h"
#include "rdopt.h"
#include "vpx_mem/vpx_mem.h"
@@ -163,7 +164,7 @@
double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
double rdconst = 2.80;
- vp8_clear_system_state();
+ vpx_clear_system_state();
/* Further tests required to see if optimum is different
* for key frames, golden frames and arf frames.
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -24,6 +24,7 @@
#include "decoder/onyxd_int.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/system_state.h"
#if CONFIG_ERROR_CONCEALMENT
#include "decoder/error_concealment.h"
#endif
@@ -365,7 +366,7 @@
* reallocation is attempted on resync. */
ctx->si.w = 0;
ctx->si.h = 0;
- vp8_clear_system_state();
+ vpx_clear_system_state();
/* same return value as used in vp8dx_receive_compressed_data */
return -1;
}
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -11,12 +11,6 @@
}
forward_decls qw/vpx_dsp_forward_decls/;
-# optimizations which depend on multiple features
-$avx2_ssse3 = '';
-if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
- $avx2_ssse3 = 'avx2';
-}
-
# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
@@ -437,13 +431,13 @@
specialize qw/vpx_convolve_avg neon dspr2 msa sse2/;
add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
+specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa/;
add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
+specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa/;
add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
+specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa/;
add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa/;