shithub: mp3dec

Download patch

ref: f912b8420f1ee8b5617bd155c6faa8f870375769
parent: 19b619acde594e882412c25420169fabdc256120
author: Chris Robinson <[email protected]>
date: Wed Aug 22 00:12:38 EDT 2018

Add float output support with the MINIMP3_FLOAT_OUTPUT macro

When defined, mp3dec_decode_frame changes to output float samples (without
clamping or 16-bit quantization), and the new method mp3dec_f32_to_s16 is
defined to help convert float samples to signed 16-bit.

--- a/minimp3.h
+++ b/minimp3.h
@@ -26,12 +26,16 @@
 #endif
 
 void mp3dec_init(mp3dec_t *dec);
+#ifndef MINIMP3_FLOAT_OUTPUT
 int mp3dec_decode_frame(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, short *pcm, mp3dec_frame_info_t *info);
+#else
+int mp3dec_decode_frame(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, float *pcm, mp3dec_frame_info_t *info);
+void mp3dec_f32_to_s16(const float *in, short *out, int num_samples);
+#endif
 
 #ifdef __cplusplus
 }
 #endif
-#endif /*MINIMP3_H*/
 
 #ifdef MINIMP3_IMPLEMENTATION
 
@@ -1365,19 +1369,28 @@
 #endif
 }
 
+#ifndef MINIMP3_FLOAT_OUTPUT
+typedef short mp3d_sample_t;
+
 static short mp3d_scale_pcm(float sample)
 {
-    if (sample >  32767.0) return (short) 32767;
-    if (sample < -32768.0) return (short)-32768;
-    int s = (int)(sample + .5f);
+    if (sample >=  32766.5) return (short) 32767;
+    if (sample <= -32767.5) return (short)-32768;
+    short s = (short)(sample + .5f);
     s -= (s < 0);   /* away from zero, to be compliant */
-    if (s >  32767) return (short) 32767;
-    if (s < -32768) return (short)-32768;
-    return (short)s;
+    return s;
 }
+#else
+typedef float mp3d_sample_t;
 
-static void mp3d_synth_pair(short *pcm, int nch, const float *z)
+static float mp3d_scale_pcm(float sample)
 {
+    return sample / 32768.0f;
+}
+#endif
+
+static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)
+{
     float a;
     a  = (z[14*64] - z[    0]) * 29;
     a += (z[ 1*64] + z[13*64]) * 213;
@@ -1401,11 +1414,11 @@
     pcm[16*nch] = mp3d_scale_pcm(a);
 }
 
-static void mp3d_synth(float *xl, short *dstl, int nch, float *lins)
+static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
 {
     int i;
     float *xr = xl + 576*(nch - 1);
-    short *dstr = dstl + (nch - 1);
+    mp3d_sample_t *dstr = dstl + (nch - 1);
 
     static const float g_win[] = {
         -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
@@ -1462,6 +1475,7 @@
         V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
 
         {
+#ifndef MINIMP3_FLOAT_OUTPUT
 #if HAVE_SSE
             static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
             static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
@@ -1490,6 +1504,32 @@
             vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
             vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
 #endif
+
+#else
+
+            static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
+            a = VMUL(a, g_scale);
+            b = VMUL(b, g_scale);
+#if HAVE_SSE
+            _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
+            _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
+            _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
+            _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
+            _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
+            _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
+            _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
+            _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
+#else
+            vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
+            vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
+            vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
+            vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
+            vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
+            vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
+            vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
+            vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
+#endif
+#endif /* MINIMP3_FLOAT_OUTPUT */
         }
     } else
 #endif
@@ -1527,7 +1567,7 @@
 #endif
 }
 
-static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, short *pcm, float *lins)
+static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)
 {
     int i;
     for (i = 0; i < nch; i++)
@@ -1611,7 +1651,7 @@
     dec->header[0] = 0;
 }
 
-int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, short *pcm, mp3dec_frame_info_t *info)
+int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)
 {
     int i = 0, igr, frame_size = 0, success = 1;
     const uint8_t *hdr;
@@ -1704,4 +1744,68 @@
     }
     return success*hdr_frame_samples(dec->header);
 }
+
+#ifdef MINIMP3_FLOAT_OUTPUT
+void mp3dec_f32_to_s16(const float *in, short *out, int num_samples)
+{
+    if(num_samples > 0)
+    {
+        int i = 0;
+#if HAVE_SIMD
+        int aligned_count = num_samples & ~7;
+
+        for(;i < aligned_count;i+=8)
+        {
+            static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
+            f4 a = VMUL(VLD(&in[i  ]), g_scale);
+            f4 b = VMUL(VLD(&in[i+4]), g_scale);
+#if HAVE_SSE
+            static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
+            static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
+            __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
+                                           _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
+            out[i  ] = _mm_extract_epi16(pcm8, 0);
+            out[i+1] = _mm_extract_epi16(pcm8, 1);
+            out[i+2] = _mm_extract_epi16(pcm8, 2);
+            out[i+3] = _mm_extract_epi16(pcm8, 3);
+            out[i+4] = _mm_extract_epi16(pcm8, 4);
+            out[i+5] = _mm_extract_epi16(pcm8, 5);
+            out[i+6] = _mm_extract_epi16(pcm8, 6);
+            out[i+7] = _mm_extract_epi16(pcm8, 7);
+#else
+            int16x4_t pcma, pcmb;
+            a = VADD(a, VSET(0.5f));
+            b = VADD(b, VSET(0.5f));
+            pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
+            pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
+            vst1_lane_s16(out+i  , pcma, 0);
+            vst1_lane_s16(out+i+1, pcma, 1);
+            vst1_lane_s16(out+i+2, pcma, 2);
+            vst1_lane_s16(out+i+3, pcma, 3);
+            vst1_lane_s16(out+i+4, pcmb, 0);
+            vst1_lane_s16(out+i+5, pcmb, 1);
+            vst1_lane_s16(out+i+6, pcmb, 2);
+            vst1_lane_s16(out+i+7, pcmb, 3);
+#endif
+        }
+#endif
+        for(;i < num_samples;i++)
+        {
+            float sample = in[i] * 32768.0f;
+            if(sample >=  32766.5)
+                out[i] = (short) 32767;
+            else if (sample <= -32767.5)
+                out[i] = (short)-32768;
+            else
+            {
+                short s = (short)(sample + .5f);
+                s -= (s < 0);   /* away from zero, to be compliant */
+                out[i] = s;
+            }
+        }
+    }
+}
+#endif
+
 #endif /*MINIMP3_IMPLEMENTATION*/
+#endif /*MINIMP3_H*/