ref: 80e1e1a33c3b1b64012ef24a6537176ea0cc7fd0
parent: 8ea3282ceaf9e2915d60310fc74fed6951e2b754
author: Chris Moeller <[email protected]>
date: Sat Jan 10 16:50:15 EST 2015
Overhauled resampler quite a bit, and implmented a new band-limited linear interpolation mode
--- a/dumb/include/dumb.h
+++ b/dumb/include/dumb.h
@@ -676,9 +676,10 @@
#define DUMB_RQ_ALIASING 0
#define DUMB_RQ_BLEP 1
#define DUMB_RQ_LINEAR 2
-#define DUMB_RQ_CUBIC 3
-#define DUMB_RQ_FIR 4
-#define DUMB_RQ_N_LEVELS 5
+#define DUMB_RQ_BLAM 3
+#define DUMB_RQ_CUBIC 4
+#define DUMB_RQ_FIR 5
+#define DUMB_RQ_N_LEVELS 6
extern int dumb_resampling_quality; /* This specifies the default */
void dumb_it_set_resampling_quality(DUMB_IT_SIGRENDERER * sigrenderer, int quality); /* This overrides it */
--- a/dumb/include/internal/resampler.h
+++ b/dumb/include/internal/resampler.h
@@ -36,9 +36,10 @@
RESAMPLER_QUALITY_ZOH = 0,
RESAMPLER_QUALITY_BLEP = 1,
RESAMPLER_QUALITY_LINEAR = 2,
- RESAMPLER_QUALITY_CUBIC = 3,
- RESAMPLER_QUALITY_SINC = 4,
- RESAMPLER_QUALITY_MAX = 4
+ RESAMPLER_QUALITY_BLAM = 3,
+ RESAMPLER_QUALITY_CUBIC = 4,
+ RESAMPLER_QUALITY_SINC = 5,
+ RESAMPLER_QUALITY_MAX = 5
};
void resampler_set_quality(void *, int quality);
@@ -52,6 +53,6 @@
int resampler_get_sample_count(void *);
int resampler_get_sample(void *);
float resampler_get_sample_float(void *);
-void resampler_remove_sample(void *);
+void resampler_remove_sample(void *, int decay);
#endif
--- a/dumb/src/helpers/resamp3.inc
+++ b/dumb/src/helpers/resamp3.inc
@@ -114,8 +114,12 @@
}
x = &src[pos*SRC_CHANNELS];
while ( todo ) {
- while ( resampler_get_free_count( resampler->fir_resampler[0] ) &&
- pos >= resampler->start )
+ while ( ( resampler_get_free_count( resampler->fir_resampler[0] ) ||
+ (!resampler_get_sample_count( resampler->fir_resampler[0] )
+#if SRC_CHANNELS == 2
+ && !resampler_get_sample_count( resampler->fir_resampler[1] )
+#endif
+ ) ) && pos >= resampler->start )
{
POKE_FIR(0);
pos--;
@@ -159,8 +163,12 @@
}
x = &src[pos*SRC_CHANNELS];
while ( todo ) {
- while ( resampler_get_free_count( resampler->fir_resampler[0] ) &&
- pos < resampler->end )
+ while ( ( resampler_get_free_count( resampler->fir_resampler[0] ) ||
+ (!resampler_get_sample_count( resampler->fir_resampler[0] )
+#if SRC_CHANNELS == 2
+ && !resampler_get_sample_count( resampler->fir_resampler[1] )
+#endif
+ ) ) && pos < resampler->end )
{
POKE_FIR(0);
pos++;
--- a/dumb/src/helpers/resample.c
+++ b/dumb/src/helpers/resample.c
@@ -74,13 +74,14 @@
* 0 - DUMB_RQ_ALIASING - fastest
* 1 - DUMB_RQ_BLEP - nicer than aliasing, but slower
* 2 - DUMB_RQ_LINEAR
- * 3 - DUMB_RQ_CUBIC
- * 4 - DUMB_RQ_FIR - nicest
+ * 3 - DUMB_RQ_BLAM - band-limited linear interpolation, nice but slower
+ * 4 - DUMB_RQ_CUBIC
+ * 5 - DUMB_RQ_FIR - nicest
*
* Values outside the range 0-4 will behave the same as the nearest
* value within the range.
*/
-int dumb_resampling_quality = DUMB_RQ_CUBIC;
+int dumb_resampling_quality = DUMB_RQ_BLAM;
--- a/dumb/src/helpers/resample.inc
+++ b/dumb/src/helpers/resample.inc
@@ -141,7 +141,7 @@
*dst++ += MULSC( resampler_get_sample( resampler->fir_resampler[0] ), vol ); \
UPDATE_VOLUME( volume, vol ); \
}
-#define ADVANCE_FIR resampler_remove_sample( resampler->fir_resampler[0] )
+#define ADVANCE_FIR resampler_remove_sample( resampler->fir_resampler[0], 1 )
#define STEREO_DEST_PEEK_FIR { \
int sample = resampler_get_sample( resampler->fir_resampler[0] ); \
*dst++ = MULSC( sample, lvol ); \
@@ -225,8 +225,8 @@
UPDATE_VOLUME( volume_right, rvol ); \
}
#define ADVANCE_FIR { \
- resampler_remove_sample( resampler->fir_resampler[0] ); \
- resampler_remove_sample( resampler->fir_resampler[1] ); \
+ resampler_remove_sample( resampler->fir_resampler[0], 1 ); \
+ resampler_remove_sample( resampler->fir_resampler[1], 1 ); \
}
#define STEREO_DEST_PEEK_FIR { \
*dst++ = MULSC( resampler_get_sample( resampler->fir_resampler[0] ), lvol ); \
--- a/dumb/src/helpers/resampler.c
+++ b/dumb/src/helpers/resampler.c
@@ -6,6 +6,13 @@
#include <xmmintrin.h>
#define RESAMPLER_SSE
#endif
+#ifdef __APPLE__
+#include <TargetConditionals.h>
+#if TARGET_CPU_ARM
+#include <arm_neon.h>
+#define RESAMPLER_NEON
+#endif
+#endif
#ifdef _MSC_VER
#define ALIGNED _declspec(align(16))
@@ -27,6 +34,10 @@
enum { SINC_SAMPLES = RESAMPLER_RESOLUTION * SINC_WIDTH };
enum { CUBIC_SAMPLES = RESAMPLER_RESOLUTION * 4 };
+static const float RESAMPLER_BLEP_CUTOFF = 0.90f;
+static const float RESAMPLER_BLAM_CUTOFF = 0.93f;
+static const float RESAMPLER_SINC_CUTOFF = 0.999f;
+
ALIGNED static float cubic_lut[CUBIC_SAMPLES];
static float sinc_lut[SINC_SAMPLES + 1];
@@ -129,10 +140,10 @@
{
int write_pos, write_filled;
int read_pos, read_filled;
- unsigned int phase;
- unsigned int phase_inc;
- unsigned int inv_phase;
- unsigned int inv_phase_inc;
+ float phase;
+ float phase_inc;
+ float inv_phase;
+ float inv_phase_inc;
unsigned char quality;
signed char delay_added;
signed char delay_removed;
@@ -173,25 +184,10 @@
void * resampler_dup(const void * _r)
{
- const resampler * r_in = ( const resampler * ) _r;
- resampler * r_out = ( resampler * ) malloc( sizeof(resampler) );
+ void * r_out = malloc( sizeof(resampler) );
if ( !r_out ) return 0;
- r_out->write_pos = r_in->write_pos;
- r_out->write_filled = r_in->write_filled;
- r_out->read_pos = r_in->read_pos;
- r_out->read_filled = r_in->read_filled;
- r_out->phase = r_in->phase;
- r_out->phase_inc = r_in->phase_inc;
- r_out->inv_phase = r_in->inv_phase;
- r_out->inv_phase_inc = r_in->inv_phase_inc;
- r_out->quality = r_in->quality;
- r_out->delay_added = r_in->delay_added;
- r_out->delay_removed = r_in->delay_removed;
- r_out->last_amp = r_in->last_amp;
- r_out->accumulator = r_in->accumulator;
- memcpy( r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in) );
- memcpy( r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out) );
+ resampler_dup_inplace(r_out, _r);
return r_out;
}
@@ -227,7 +223,8 @@
quality = RESAMPLER_QUALITY_MAX;
if ( r->quality != quality )
{
- if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP )
+ if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP ||
+ quality == RESAMPLER_QUALITY_BLAM || r->quality == RESAMPLER_QUALITY_BLAM )
{
r->read_pos = 0;
r->read_filled = 0;
@@ -257,6 +254,7 @@
return 1;
case RESAMPLER_QUALITY_LINEAR:
+ case RESAMPLER_QUALITY_BLAM:
return 2;
case RESAMPLER_QUALITY_CUBIC:
@@ -275,6 +273,7 @@
case RESAMPLER_QUALITY_ZOH:
case RESAMPLER_QUALITY_BLEP:
case RESAMPLER_QUALITY_LINEAR:
+ case RESAMPLER_QUALITY_BLAM:
return 0;
case RESAMPLER_QUALITY_CUBIC:
@@ -297,6 +296,7 @@
return 0;
case RESAMPLER_QUALITY_BLEP:
+ case RESAMPLER_QUALITY_BLAM:
return SINC_WIDTH - 1;
}
}
@@ -319,16 +319,19 @@
r->delay_removed = -1;
memset(r->buffer_in, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0]));
memset(r->buffer_in + resampler_buffer_size, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0]));
- if (r->quality == RESAMPLER_QUALITY_BLEP)
+ if (r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM)
+ {
+ r->inv_phase = 0;
memset(r->buffer_out, 0, sizeof(r->buffer_out));
+ }
}
void resampler_set_rate(void *_r, double new_factor)
{
resampler * r = ( resampler * ) _r;
- r->phase_inc = (int)( new_factor * RESAMPLER_RESOLUTION_EXTRA );
+ r->phase_inc = new_factor;
new_factor = 1.0 / new_factor;
- r->inv_phase_inc = (int)( new_factor * RESAMPLER_RESOLUTION_EXTRA );
+ r->inv_phase_inc = new_factor;
}
void resampler_write_sample(void *_r, short s)
@@ -390,8 +393,8 @@
float* out = *out_;
float const* in = in_;
float const* const in_end = in + in_size;
- int phase = r->phase;
- int phase_inc = r->phase_inc;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
do
{
@@ -405,9 +408,9 @@
phase += phase_inc;
- in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA);
+ in += (int)phase;
- phase &= RESAMPLER_RESOLUTION_EXTRA - 1;
+ phase = fmod(phase, 1.0f);
}
while ( in < in_end );
@@ -422,6 +425,7 @@
return used;
}
+#ifndef RESAMPLER_NEON
static int resampler_run_blep(resampler * r, float ** out_, float * out_end)
{
int in_size = r->write_filled;
@@ -434,38 +438,45 @@
float const* in = in_;
float const* const in_end = in + in_size;
float last_amp = r->last_amp;
- int inv_phase = r->inv_phase;
- int inv_phase_inc = r->inv_phase_inc;
+ float inv_phase = r->inv_phase;
+ float inv_phase_inc = r->inv_phase_inc;
- const int step = RESAMPLER_RESOLUTION;
+ const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION;
+ const int window_step = RESAMPLER_RESOLUTION;
do
{
- float kernel[SINC_WIDTH * 2], kernel_sum = 0.0;
- int phase_reduced = inv_phase >> RESAMPLER_SHIFT_EXTRA;
- int i = SINC_WIDTH;
float sample;
if ( out + SINC_WIDTH * 2 > out_end )
break;
- for (; i >= -SINC_WIDTH + 1; --i)
+ sample = *in++ - last_amp;
+
+ if (sample)
{
- int pos = i * step;
- int abs_pos = abs(phase_reduced - pos);
- kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs_pos] * window_lut[abs_pos];
+ float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f;
+ int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
+ int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+ int i = SINC_WIDTH;
+
+ for (; i >= -SINC_WIDTH + 1; --i)
+ {
+ int pos = i * step;
+ int window_pos = i * window_step;
+ kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+ }
+ last_amp += sample;
+ sample /= kernel_sum;
+ for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i)
+ out[i] += sample * kernel[i];
}
- sample = *in++ - last_amp;
- last_amp += sample;
- sample /= kernel_sum;
- for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i)
- out[i] += sample * kernel[i];
inv_phase += inv_phase_inc;
- out += inv_phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA);
+ out += (int)inv_phase;
- inv_phase &= RESAMPLER_RESOLUTION_EXTRA - 1;
+ inv_phase = fmod(inv_phase, 1.0f);
}
while ( in < in_end );
@@ -480,6 +491,7 @@
return used;
}
+#endif
#ifdef RESAMPLER_SSE
static int resampler_run_blep_sse(resampler * r, float ** out_, float * out_end)
@@ -494,50 +506,134 @@
float const* in = in_;
float const* const in_end = in + in_size;
float last_amp = r->last_amp;
- int inv_phase = r->inv_phase;
- int inv_phase_inc = r->inv_phase_inc;
+ float inv_phase = r->inv_phase;
+ float inv_phase_inc = r->inv_phase_inc;
- const int step = RESAMPLER_RESOLUTION;
+ const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION;
+ const int window_step = RESAMPLER_RESOLUTION;
do
{
- // accumulate in extended precision
- float kernel_sum = 0.0;
- __m128 kernel[SINC_WIDTH / 2];
- __m128 temp1, temp2;
- __m128 samplex;
float sample;
- float *kernelf = (float*)(&kernel);
- int phase_reduced = inv_phase >> RESAMPLER_SHIFT_EXTRA;
- int i = SINC_WIDTH;
if ( out + SINC_WIDTH * 2 > out_end )
break;
- for (; i >= -SINC_WIDTH + 1; --i)
+ sample = *in++ - last_amp;
+
+ if (sample)
{
- int pos = i * step;
- int abs_pos = abs(phase_reduced - pos);
- kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs_pos] * window_lut[abs_pos];
+ float kernel_sum = 0.0f;
+ __m128 kernel[SINC_WIDTH / 2];
+ __m128 temp1, temp2;
+ __m128 samplex;
+ float *kernelf = (float*)(&kernel);
+ int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
+ int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+ int i = SINC_WIDTH;
+
+ for (; i >= -SINC_WIDTH + 1; --i)
+ {
+ int pos = i * step;
+ int window_pos = i * window_step;
+ kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+ }
+ last_amp += sample;
+ sample /= kernel_sum;
+ samplex = _mm_set1_ps( sample );
+ for (i = 0; i < SINC_WIDTH / 2; ++i)
+ {
+ temp1 = _mm_load_ps( (const float *)( kernel + i ) );
+ temp1 = _mm_mul_ps( temp1, samplex );
+ temp2 = _mm_loadu_ps( (const float *) out + i * 4 );
+ temp1 = _mm_add_ps( temp1, temp2 );
+ _mm_storeu_ps( (float *) out + i * 4, temp1 );
+ }
}
+
+ inv_phase += inv_phase_inc;
+
+ out += (int)inv_phase;
+
+ inv_phase = fmod(inv_phase, 1.0f);
+ }
+ while ( in < in_end );
+
+ r->inv_phase = inv_phase;
+ r->last_amp = last_amp;
+ *out_ = out;
+
+ used = (int)(in - in_);
+
+ r->write_filled -= used;
+ }
+
+ return used;
+}
+#endif
+
+#ifdef RESAMPLER_NEON
+static int resampler_run_blep(resampler * r, float ** out_, float * out_end)
+{
+ int in_size = r->write_filled;
+ float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+ int used = 0;
+ in_size -= 1;
+ if ( in_size > 0 )
+ {
+ float* out = *out_;
+ float const* in = in_;
+ float const* const in_end = in + in_size;
+ float last_amp = r->last_amp;
+ float inv_phase = r->inv_phase;
+ float inv_phase_inc = r->inv_phase_inc;
+
+ const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION;
+ const int window_step = RESAMPLER_RESOLUTION;
+
+ do
+ {
+ float sample;
+
+ if ( out + SINC_WIDTH * 2 > out_end )
+ break;
+
sample = *in++ - last_amp;
- last_amp += sample;
- sample /= kernel_sum;
- samplex = _mm_set1_ps( sample );
- for (i = 0; i < SINC_WIDTH / 2; ++i)
+
+ if (sample)
{
- temp1 = _mm_load_ps( (const float *)( kernel + i ) );
- temp1 = _mm_mul_ps( temp1, samplex );
- temp2 = _mm_loadu_ps( (const float *) out + i * 4 );
- temp1 = _mm_add_ps( temp1, temp2 );
- _mm_storeu_ps( (float *) out + i * 4, temp1 );
+ float kernel_sum = 0.0f;
+ float32x4_t kernel[SINC_WIDTH / 2];
+ float32x4_t temp1, temp2;
+ float32x4_t samplex;
+ float *kernelf = (float*)(&kernel);
+ int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
+ int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+ int i = SINC_WIDTH;
+
+ for (; i >= -SINC_WIDTH + 1; --i)
+ {
+ int pos = i * step;
+ int window_pos = i * window_step;
+ kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+ }
+ last_amp += sample;
+ sample /= kernel_sum;
+ samplex = vdupq_n_f32(sample);
+ for (i = 0; i < SINC_WIDTH / 2; ++i)
+ {
+ temp1 = vld1q_f32( (const float32_t *)( kernel + i ) );
+ temp2 = vld1q_f32( (const float32_t *) out + i * 4 );
+ temp1 = vmlaq_f32( temp2, temp1, samplex );
+ vst1q_f32( (float32_t *) out + i * 4, temp1 );
+ }
}
inv_phase += inv_phase_inc;
- out += inv_phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA);
+ out += (int)inv_phase;
- inv_phase &= RESAMPLER_RESOLUTION_EXTRA - 1;
+ inv_phase = fmod(inv_phase, 1.0f);
}
while ( in < in_end );
@@ -565,8 +661,8 @@
float* out = *out_;
float const* in = in_;
float const* const in_end = in + in_size;
- int phase = r->phase;
- int phase_inc = r->phase_inc;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
do
{
@@ -575,14 +671,14 @@
if ( out >= out_end )
break;
- sample = in[0] + (in[1] - in[0]) * ((float)phase / RESAMPLER_RESOLUTION_EXTRA);
+ sample = in[0] + (in[1] - in[0]) * phase;
*out++ = sample;
phase += phase_inc;
- in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA);
+ in += (int)phase;
- phase &= RESAMPLER_RESOLUTION_EXTRA - 1;
+ phase = fmod(phase, 1.0f);
}
while ( in < in_end );
@@ -597,6 +693,287 @@
return used;
}
+#ifndef RESAMPLER_NEON
+static int resampler_run_blam(resampler * r, float ** out_, float * out_end)
+{
+ int in_size = r->write_filled;
+ float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+ int used = 0;
+ in_size -= 2;
+ if ( in_size > 0 )
+ {
+ float* out = *out_;
+ float const* in = in_;
+ float const* const in_end = in + in_size;
+ float last_amp = r->last_amp;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
+ float inv_phase = r->inv_phase;
+ float inv_phase_inc = r->inv_phase_inc;
+
+ const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION;
+ const int window_step = RESAMPLER_RESOLUTION;
+
+ do
+ {
+ float sample;
+
+ if ( out + SINC_WIDTH * 2 > out_end )
+ break;
+
+ sample = in[0];
+ if (phase_inc < 1.0f)
+ sample += (in[1] - in[0]) * phase;
+ sample -= last_amp;
+
+ if (sample)
+ {
+ float kernel[SINC_WIDTH * 2], kernel_sum = 0.0f;
+ int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
+ int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+ int i = SINC_WIDTH;
+
+ for (; i >= -SINC_WIDTH + 1; --i)
+ {
+ int pos = i * step;
+ int window_pos = i * window_step;
+ kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+ }
+ last_amp += sample;
+ sample /= kernel_sum;
+ for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i)
+ out[i] += sample * kernel[i];
+ }
+
+ if (inv_phase_inc < 1.0f)
+ {
+ ++in;
+ inv_phase += inv_phase_inc;
+ out += (int)inv_phase;
+ inv_phase = fmod(inv_phase, 1.0f);
+ }
+ else
+ {
+ phase += phase_inc;
+ ++out;
+ in += (int)phase;
+ phase = fmod(phase, 1.0f);
+ }
+ }
+ while ( in < in_end );
+
+ r->phase = phase;
+ r->inv_phase = inv_phase;
+ r->last_amp = last_amp;
+ *out_ = out;
+
+ used = (int)(in - in_);
+
+ r->write_filled -= used;
+ }
+
+ return used;
+}
+#endif
+
+#ifdef RESAMPLER_SSE
+static int resampler_run_blam_sse(resampler * r, float ** out_, float * out_end)
+{
+ int in_size = r->write_filled;
+ float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+ int used = 0;
+ in_size -= 2;
+ if ( in_size > 0 )
+ {
+ float* out = *out_;
+ float const* in = in_;
+ float const* const in_end = in + in_size;
+ float last_amp = r->last_amp;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
+ float inv_phase = r->inv_phase;
+ float inv_phase_inc = r->inv_phase_inc;
+
+ const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION;
+ const int window_step = RESAMPLER_RESOLUTION;
+
+ do
+ {
+ float sample;
+
+ if ( out + SINC_WIDTH * 2 > out_end )
+ break;
+
+ sample = in[0];
+ if (phase_inc < 1.0f)
+ {
+ sample += (in[1] - in[0]) * phase;
+ }
+ sample -= last_amp;
+
+ if (sample)
+ {
+ float kernel_sum = 0.0f;
+ __m128 kernel[SINC_WIDTH / 2];
+ __m128 temp1, temp2;
+ __m128 samplex;
+ float *kernelf = (float*)(&kernel);
+ int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
+ int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+ int i = SINC_WIDTH;
+
+ for (; i >= -SINC_WIDTH + 1; --i)
+ {
+ int pos = i * step;
+ int window_pos = i * window_step;
+ kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+ }
+ last_amp += sample;
+ sample /= kernel_sum;
+ samplex = _mm_set1_ps( sample );
+ for (i = 0; i < SINC_WIDTH / 2; ++i)
+ {
+ temp1 = _mm_load_ps( (const float *)( kernel + i ) );
+ temp1 = _mm_mul_ps( temp1, samplex );
+ temp2 = _mm_loadu_ps( (const float *) out + i * 4 );
+ temp1 = _mm_add_ps( temp1, temp2 );
+ _mm_storeu_ps( (float *) out + i * 4, temp1 );
+ }
+ }
+
+ if (inv_phase_inc < 1.0f)
+ {
+ ++in;
+ inv_phase += inv_phase_inc;
+ out += (int)inv_phase;
+ inv_phase = fmod(inv_phase, 1.0f);
+ }
+ else
+ {
+ phase += phase_inc;
+ ++out;
+
+ if (phase >= 1.0f)
+ {
+ ++in;
+ phase = fmod(phase, 1.0f);
+ }
+ }
+ }
+ while ( in < in_end );
+
+ r->phase = phase;
+ r->inv_phase = inv_phase;
+ r->last_amp = last_amp;
+ *out_ = out;
+
+ used = (int)(in - in_);
+
+ r->write_filled -= used;
+ }
+
+ return used;
+}
+#endif
+
+#ifdef RESAMPLER_NEON
+static int resampler_run_blam(resampler * r, float ** out_, float * out_end)
+{
+ int in_size = r->write_filled;
+ float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+ int used = 0;
+ in_size -= 2;
+ if ( in_size > 0 )
+ {
+ float* out = *out_;
+ float const* in = in_;
+ float const* const in_end = in + in_size;
+ float last_amp = r->last_amp;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
+ float inv_phase = r->inv_phase;
+ float inv_phase_inc = r->inv_phase_inc;
+
+ const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION;
+ const int window_step = RESAMPLER_RESOLUTION;
+
+ do
+ {
+ float sample;
+
+ if ( out + SINC_WIDTH * 2 > out_end )
+ break;
+
+ sample = in[0];
+ if (phase_inc < 1.0f)
+ sample += (in[1] - in[0]) * fphase;
+ sample -= last_amp;
+
+ if (sample)
+ {
+ float kernel_sum = 0.0;
+ float32x4_t kernel[SINC_WIDTH / 2];
+ float32x4_t temp1, temp2;
+ float32x4_t samplex;
+ float *kernelf = (float*)(&kernel);
+ int phase_reduced = (int)(inv_phase * RESAMPLER_RESOLUTION);
+ int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+ int i = SINC_WIDTH;
+
+ for (; i >= -SINC_WIDTH + 1; --i)
+ {
+ int pos = i * step;
+ int window_pos = i * window_step;
+ kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+ }
+ last_amp += sample;
+ sample /= kernel_sum;
+ samplex = vdupq_n_f32(sample);
+ for (i = 0; i < SINC_WIDTH / 2; ++i)
+ {
+ temp1 = vld1q_f32( (const float32_t *)( kernel + i ) );
+ temp2 = vld1q_f32( (const float32_t *) out + i * 4 );
+ temp1 = vmlaq_f32( temp2, temp1, samplex );
+ vst1q_f32( (float32_t *) out + i * 4, temp1 );
+ }
+ }
+
+ if (inv_phase_inc < 1.0f)
+ {
+ ++in;
+ inv_phase += inv_phase_inc;
+ out += (int)inv_phase;
+ inv_phase = fmod(inv_phase, 1.0f);
+ }
+ else
+ {
+ phase += phase_inc;
+ ++out;
+
+ if (phase >= 1.0f)
+ {
+ ++in;
+ phase = fmod(phase, 1.0f);
+ }
+ }
+ }
+ while ( in < in_end );
+
+ r->phase = phase;
+ r->inv_phase = inv_phase;
+ r->last_amp = last_amp;
+ *out_ = out;
+
+ used = (int)(in - in_);
+
+ r->write_filled -= used;
+ }
+
+ return used;
+}
+#endif
+
+#ifndef RESAMPLER_NEON
static int resampler_run_cubic(resampler * r, float ** out_, float * out_end)
{
int in_size = r->write_filled;
@@ -608,8 +985,8 @@
float* out = *out_;
float const* in = in_;
float const* const in_end = in + in_size;
- int phase = r->phase;
- int phase_inc = r->phase_inc;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
do
{
@@ -620,7 +997,7 @@
if ( out >= out_end )
break;
- kernel = cubic_lut + (phase >> RESAMPLER_SHIFT_EXTRA) * 4;
+ kernel = cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4;
for (sample = 0, i = 0; i < 4; ++i)
sample += in[i] * kernel[i];
@@ -628,9 +1005,9 @@
phase += phase_inc;
- in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA);
+ in += (int)phase;
- phase &= RESAMPLER_RESOLUTION_EXTRA - 1;
+ phase = fmod(phase, 1.0f);
}
while ( in < in_end );
@@ -644,6 +1021,7 @@
return used;
}
+#endif
#ifdef RESAMPLER_SSE
static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end)
@@ -657,8 +1035,8 @@
float* out = *out_;
float const* in = in_;
float const* const in_end = in + in_size;
- int phase = r->phase;
- int phase_inc = r->phase_inc;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
do
{
@@ -669,7 +1047,7 @@
break;
temp1 = _mm_loadu_ps( (const float *)( in ) );
- temp2 = _mm_load_ps( (const float *)( cubic_lut + (phase >> RESAMPLER_SHIFT_EXTRA) * 4 ) );
+ temp2 = _mm_load_ps( (const float *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) );
temp1 = _mm_mul_ps( temp1, temp2 );
samplex = _mm_add_ps( samplex, temp1 );
temp1 = _mm_movehl_ps( temp1, samplex );
@@ -682,9 +1060,9 @@
phase += phase_inc;
- in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA);
+ in += (int)phase;
- phase &= RESAMPLER_RESOLUTION_EXTRA - 1;
+ phase = fmod(phase, 1.0f);
}
while ( in < in_end );
@@ -700,6 +1078,56 @@
}
#endif
+#ifdef RESAMPLER_NEON
+static int resampler_run_cubic(resampler * r, float ** out_, float * out_end)
+{
+ int in_size = r->write_filled;
+ float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+ int used = 0;
+ in_size -= 4;
+ if ( in_size > 0 )
+ {
+ float* out = *out_;
+ float const* in = in_;
+ float const* const in_end = in + in_size;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
+
+ do
+ {
+ float32x4_t temp1, temp2;
+ float32x2_t half;
+
+ if ( out >= out_end )
+ break;
+
+ temp1 = vld1q_f32( (const float32_t *)( in ) );
+ temp2 = vld1q_f32( (const float32_t *)( cubic_lut + (int)(phase * RESAMPLER_RESOLUTION) * 4 ) );
+ temp1 = vmulq_f32( temp1, temp2 );
+ half = vadd_f32(vget_high_f32(temp1), vget_low_f32(temp1));
+ *out++ = vget_lane_f32(vpadd_f32(half, half), 0);
+
+ phase += phase_inc;
+
+ in += (int)phase;
+
+ phase = fmod(phase, 1.0f);
+ }
+ while ( in < in_end );
+
+ r->phase = phase;
+ *out_ = out;
+
+ used = (int)(in - in_);
+
+ r->write_filled -= used;
+ }
+
+ return used;
+}
+#endif
+
+#ifndef RESAMPLER_NEON
static int resampler_run_sinc(resampler * r, float ** out_, float * out_end)
{
int in_size = r->write_filled;
@@ -711,10 +1139,10 @@
float* out = *out_;
float const* in = in_;
float const* const in_end = in + in_size;
- int phase = r->phase;
- int phase_inc = r->phase_inc;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
- int step = phase_inc > RESAMPLER_RESOLUTION_EXTRA ? RESAMPLER_RESOLUTION * RESAMPLER_RESOLUTION_EXTRA / phase_inc : RESAMPLER_RESOLUTION;
+ int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF);
int window_step = RESAMPLER_RESOLUTION;
do
@@ -721,7 +1149,7 @@
{
float kernel[SINC_WIDTH * 2], kernel_sum = 0.0;
int i = SINC_WIDTH;
- int phase_reduced = phase >> RESAMPLER_SHIFT_EXTRA;
+ int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION);
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
float sample;
@@ -740,9 +1168,9 @@
phase += phase_inc;
- in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA);
+ in += (int)phase;
- phase &= RESAMPLER_RESOLUTION_EXTRA - 1;
+ phase = fmod(phase, 1.0f);
}
while ( in < in_end );
@@ -756,6 +1184,7 @@
return used;
}
+#endif
#ifdef RESAMPLER_SSE
static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end)
@@ -769,10 +1198,10 @@
float* out = *out_;
float const* in = in_;
float const* const in_end = in + in_size;
- int phase = r->phase;
- int phase_inc = r->phase_inc;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
- int step = phase_inc > RESAMPLER_RESOLUTION_EXTRA ? RESAMPLER_RESOLUTION * RESAMPLER_RESOLUTION_EXTRA / phase_inc : RESAMPLER_RESOLUTION;
+ int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF);
int window_step = RESAMPLER_RESOLUTION;
do
@@ -784,7 +1213,7 @@
__m128 samplex = _mm_setzero_ps();
float *kernelf = (float*)(&kernel);
int i = SINC_WIDTH;
- int phase_reduced = phase >> RESAMPLER_SHIFT_EXTRA;
+ int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION);
int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
if ( out >= out_end )
@@ -816,9 +1245,9 @@
phase += phase_inc;
- in += phase >> (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA);
+ in += (int)phase;
- phase &= RESAMPLER_RESOLUTION_EXTRA - 1;
+ phase = fmod(phase, 1.0f);
}
while ( in < in_end );
@@ -834,6 +1263,77 @@
}
#endif
+#ifdef RESAMPLER_NEON
+static int resampler_run_sinc(resampler * r, float ** out_, float * out_end)
+{
+ int in_size = r->write_filled;
+ float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+ int used = 0;
+ in_size -= SINC_WIDTH * 2;
+ if ( in_size > 0 )
+ {
+ float* out = *out_;
+ float const* in = in_;
+ float const* const in_end = in + in_size;
+ float phase = r->phase;
+ float phase_inc = r->phase_inc;
+
+ int step = phase_inc > 1.0f ? (int)(RESAMPLER_RESOLUTION / phase_inc * RESAMPLER_SINC_CUTOFF) : (int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF);
+ int window_step = RESAMPLER_RESOLUTION;
+
+ do
+ {
+ // accumulate in extended precision
+ float kernel_sum = 0.0;
+ float32x4_t kernel[SINC_WIDTH / 2];
+ float32x4_t temp1, temp2;
+ float32x4_t samplex = {0};
+ float32x2_t half;
+ float *kernelf = (float*)(&kernel);
+ int i = SINC_WIDTH;
+ int phase_reduced = (int)(phase * RESAMPLER_RESOLUTION);
+ int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+
+ if ( out >= out_end )
+ break;
+
+ for (; i >= -SINC_WIDTH + 1; --i)
+ {
+ int pos = i * step;
+ int window_pos = i * window_step;
+ kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+ }
+ for (i = 0; i < SINC_WIDTH / 2; ++i)
+ {
+ temp1 = vld1q_f32( (const float32_t *)( in + i * 4 ) );
+ temp2 = vld1q_f32( (const float32_t *)( kernel + i ) );
+ samplex = vmlaq_f32( samplex, temp1, temp2 );
+ }
+ kernel_sum = 1.0 / kernel_sum;
+ samplex = vmulq_f32(samplex, vmovq_n_f32(kernel_sum));
+ half = vadd_f32(vget_high_f32(samplex), vget_low_f32(samplex));
+ *out++ = vget_lane_f32(vpadd_f32(half, half), 0);
+
+ phase += phase_inc;
+
+ in += (int)phase;
+
+ phase = fmod(phase, 1.0f);
+ }
+ while ( in < in_end );
+
+ r->phase = phase;
+ *out_ = out;
+
+ used = (int)(in - in_);
+
+ r->write_filled -= used;
+ }
+
+ return used;
+}
+#endif
+
static void resampler_fill(resampler * r)
{
int min_filled = resampler_min_filled(r);
@@ -861,9 +1361,11 @@
if ( write_extra > SINC_WIDTH * 2 - 1 )
write_extra = SINC_WIDTH * 2 - 1;
memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) );
+#ifdef RESAMPLER_SSE
if ( resampler_has_sse )
used = resampler_run_blep_sse( r, &out, out + write_size + write_extra );
else
+#endif
used = resampler_run_blep( r, &out, out + write_size + write_extra );
memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) );
if (!used)
@@ -875,6 +1377,27 @@
resampler_run_linear( r, &out, out + write_size );
break;
+ case RESAMPLER_QUALITY_BLAM:
+ {
+ float * out_ = out;
+ int write_extra = 0;
+ if ( write_pos >= r->read_pos )
+ write_extra = r->read_pos;
+ if ( write_extra > SINC_WIDTH * 2 - 1 )
+ write_extra = SINC_WIDTH * 2 - 1;
+ memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) );
+#ifdef RESAMPLER_SSE
+ if ( resampler_has_sse )
+ resampler_run_blam_sse( r, &out, out + write_size + write_extra );
+ else
+#endif
+ resampler_run_blam( r, &out, out + write_size + write_extra );
+ memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) );
+ if ( out == out_ )
+ return;
+ break;
+ }
+
case RESAMPLER_QUALITY_CUBIC:
#ifdef RESAMPLER_SSE
if ( resampler_has_sse )
@@ -905,7 +1428,7 @@
int delay = resampler_output_delay( r );
r->delay_removed = 0;
while ( delay-- )
- resampler_remove_sample( r );
+ resampler_remove_sample( r, 1 );
}
}
@@ -912,7 +1435,7 @@
int resampler_get_sample_count(void *_r)
{
resampler * r = ( resampler * ) _r;
- if ( r->read_filled < 1 && (r->quality != RESAMPLER_QUALITY_BLEP || r->inv_phase_inc))
+ if ( r->read_filled < 1 && ((r->quality != RESAMPLER_QUALITY_BLEP && r->quality != RESAMPLER_QUALITY_BLAM) || r->inv_phase_inc))
resampler_fill_and_remove_delay( r );
return r->read_filled;
}
@@ -924,7 +1447,7 @@
resampler_fill_and_remove_delay( r );
if ( r->read_filled < 1 )
return 0;
- if ( r->quality == RESAMPLER_QUALITY_BLEP )
+ if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM )
return (int)(r->buffer_out[ r->read_pos ] + r->accumulator);
else
return (int)r->buffer_out[ r->read_pos ];
@@ -937,24 +1460,27 @@
resampler_fill_and_remove_delay( r );
if ( r->read_filled < 1 )
return 0;
- if ( r->quality == RESAMPLER_QUALITY_BLEP )
+ if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM )
return r->buffer_out[ r->read_pos ] + r->accumulator;
else
return r->buffer_out[ r->read_pos ];
}
-void resampler_remove_sample(void *_r)
+void resampler_remove_sample(void *_r, int decay)
{
resampler * r = ( resampler * ) _r;
if ( r->read_filled > 0 )
{
- if ( r->quality == RESAMPLER_QUALITY_BLEP )
+ if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM )
{
r->accumulator += r->buffer_out[ r->read_pos ];
r->buffer_out[ r->read_pos ] = 0;
- r->accumulator -= r->accumulator * (1.0 / 8192.0);
- if (fabs(r->accumulator) < 1e-20)
- r->accumulator = 0;
+ if (decay)
+ {
+ r->accumulator -= r->accumulator * (1.0f / 8192.0f);
+ if (fabs(r->accumulator) < 1e-20f)
+ r->accumulator = 0;
+ }
}
--r->read_filled;
r->read_pos = ( r->read_pos + 1 ) % resampler_buffer_size;