ref: 2ce80f0c348a6c73de33cea757ec3fe69ee55611
parent: 679382ed05057bd64dafa7776a37dc5be05e57ce
author: Gregory Maxwell <[email protected]>
date: Wed Jul 25 13:32:05 EDT 2012
Change the runtime CPU type detection into a cpu mismatch warning. Getting C support for building with SSE support but without the compiler peppering the code with SSE is a huge burden. So, instead— just use the detection to (hopefully) give a helpful warning.
--- a/Makefile.am
+++ b/Makefile.am
@@ -22,6 +22,7 @@
src/speex_resampler.h \
src/stack_alloc.h \
win32/unicode_support.h \
+ src/cpusupport.h \
src/wave_out.h \
src/wav_io.h
--- a/configure.ac
+++ b/configure.ac
@@ -192,15 +192,15 @@
if test "x$on_x86" = "xyes"; then
ac_enable_sse="no"
-AC_ARG_ENABLE(sse, AS_HELP_STRING([--enable-sse],[Build binaries that require SSE/SSE2]),
+AC_ARG_ENABLE(sse, AS_HELP_STRING([--enable-sse],[Build binaries that require SSE]),
[if test "$enableval" = yes; then
ac_enable_sse="yes"
saved_CFLAGS="$CFLAGS"
-CFLAGS="$CFLAGS -msse -msse2"
-AC_MSG_CHECKING([if ${CC} supports -msse -msse2])
+CFLAGS="$CFLAGS -msse"
+AC_MSG_CHECKING([if ${CC} supports -msse])
AC_LINK_IFELSE([AC_LANG_SOURCE([void main(void){char foo;}])],
[ AC_MSG_RESULT([yes])
- SSE="-msse -msse2"; ac_build_sse=yes ],
+ SSE="-msse"; ac_build_sse=yes ],
AC_MSG_RESULT([no]))
CFLAGS="$saved_CFLAGS $SSE"
fi])
--- /dev/null
+++ b/src/cpusupport.h
@@ -1,0 +1,47 @@
+/**
+ @file cpusupport.h
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OPUSTOOLS_CPUSUPPORT_H
+# define OPUSTOOLS_CPUSUPPORT_H
+
+# if !defined(__SSE__) || defined(_M_X64) || defined(__amd64__)
+# define query_cpu_support() 0
+#else
+# include <intrin.h>
+static inline int query_cpu_support(void)
+{
+ int buffer[4];
+ __cpuid(buffer, 1);
+ return ((buffer[3] & (1<<25)) == 0) /*SSE*/
+# ifdef __SSE2__
+ + ((buffer[3] & (1<<26)) == 0) /*SSE2*/
+# endif
+ ;
+}
+# endif
+#endif
--- a/src/opusdec.c
+++ b/src/opusdec.c
@@ -102,6 +102,7 @@
#include "diag_range.h"
#include "speex_resampler.h"
#include "stack_alloc.h"
+#include "cpusupport.h"
#define MINI(_a,_b) ((_a)<(_b)?(_a):(_b))
#define MAXI(_a,_b) ((_a)>(_b)?(_a):(_b))
@@ -665,7 +666,14 @@
#ifdef WIN_UNICODE
int argc_utf8;
char **argv_utf8;
+#endif
+ if(query_cpu_support()){
+ fprintf(stderr,"\n\n** WARNING: This program with compiled with SSE%s\n",query_cpu_support()>1?"2":"");
+ fprintf(stderr," but this CPU claims to lack these instructions. **\n\n");
+ }
+
+#ifdef WIN_UNICODE
(void)argc;
(void)argv;
--- a/src/opusenc.c
+++ b/src/opusenc.c
@@ -67,6 +67,7 @@
#include "opus_header.h"
#include "opusenc.h"
#include "diag_range.h"
+#include "cpusupport.h"
#ifdef VALGRIND
#include <valgrind/memcheck.h>
@@ -275,7 +276,14 @@
#ifdef WIN_UNICODE
int argc_utf8;
char **argv_utf8;
+#endif
+ if(query_cpu_support()){
+ fprintf(stderr,"\n\n** WARNING: This program with compiled with SSE%s\n",query_cpu_support()>1?"2":"");
+ fprintf(stderr," but this CPU claims to lack these instructions. **\n\n");
+ }
+
+#ifdef WIN_UNICODE
(void)argc;
(void)argv;
--- a/src/resample.c
+++ b/src/resample.c
@@ -98,7 +98,7 @@
#endif
#if defined(FLOATING_POINT) && defined(__SSE__)
-#include "resample_sse.h"
+# include "resample_sse.h"
#endif
/* Numer of elements to allocate on the stack */
--- a/src/resample_sse.h
+++ b/src/resample_sse.h
@@ -36,32 +36,12 @@
#include <xmmintrin.h>
-#if defined(_M_X64) || defined(__amd64__)
-#define query_cpu_support_sse() 1
-#else
-#include <intrin.h>
-
-static inline int query_cpu_support_sse(void)
-{
- static int initialized = 0;
- static int return_value;
- if (!initialized)
- {
- int buffer[4];
- __cpuid(buffer, 1);
- return_value = (buffer[3] & (1<<25)) != 0;
- initialized = 1;
- }
- return return_value;
-}
-#endif
-
#define OVERRIDE_INNER_PRODUCT_SINGLE
static inline float inner_product_single(const float *a, const float *b, unsigned int len)
{
int i;
float ret;
- if (query_cpu_support_sse())
+ if (1)
{
__m128 sum = _mm_setzero_ps();
for (i=0;i<len;i+=8)
@@ -85,7 +65,7 @@
static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
int i;
float ret;
- if (query_cpu_support_sse())
+ if (1)
{
__m128 sum = _mm_setzero_ps();
__m128 f = _mm_loadu_ps(frac);
@@ -115,7 +95,7 @@
return ret;
}
-#ifdef _USE_SSE2
+#ifdef __SSE2__
#include <emmintrin.h>
#define OVERRIDE_INNER_PRODUCT_DOUBLE