ref: 2fd4f163a35456f41c2371236c3ad34f170fbad2
parent: d8d3506de6946fd0cea29f0fc6e69b45f024a811
author: lieff <[email protected]>
date: Thu Feb 15 07:52:32 EST 2018
optimize neon code
--- a/minimp3.h
+++ b/minimp3.h
@@ -1484,9 +1484,9 @@
V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
{
+#if HAVE_SSE
static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
-#if HAVE_SSE
__m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1);
@@ -1501,10 +1501,8 @@
int16x4_t pcma, pcmb;
a = VADD(a, VSET(0.5f));
b = VADD(b, VSET(0.5f));
- a = vmaxq_f32(vminq_f32(a, g_max), g_min);
- b = vmaxq_f32(vminq_f32(b, g_max), g_min);
- pcma = vqmovn_s32(vaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
- pcmb = vqmovn_s32(vaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
+ pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
+ pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);
vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);
vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);