ref: 31703a27d9c753d0fe8b8aa039a2463742e6df53
dir: /libfaad/cfft.c/
/* ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 2 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ** ** Any non-GPL usage of this software or parts of this software is strictly ** forbidden. ** ** Commercial non-GPL licensing of this software is possible. ** For more info contact Ahead Software through [email protected]. ** ** $Id: cfft.c,v 1.24 2004/01/05 14:47:27 menno Exp $ **/ /* * Algorithmically based on Fortran-77 FFTPACK * by Paul N. Swarztrauber(Version 4, 1985). * * Does even sized fft only */ /* isign is +1 for backward and -1 for forward transforms */ #include "common.h" #include "structs.h" #include <stdlib.h> #include "cfft.h" #include "cfft_tab.h" /* static function declarations */ #ifdef USE_SSE static void passf2pos_sse(const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa); static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa); static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); #endif static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa); static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa); static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const int8_t isign); static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); static void passf4neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3, const complex_t *wa4, const int8_t isign); INLINE void cfftf1(uint16_t n, complex_t *c, complex_t *ch, const uint16_t *ifac, const complex_t *wa, const int8_t isign); static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac); /*---------------------------------------------------------------------- passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd. ----------------------------------------------------------------------*/ #if 0 //def USE_SSE static void passf2pos_sse(const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa) { uint16_t k, ah, ac; for (k = 0; k < l1; k++) { ah = 2*k; ac = 4*k; RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); } } static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa) { uint16_t i, k, ah, ac; for (k = 0; k < l1; k++) { ah = k*ido; ac = 2*k*ido; for (i = 0; i < ido; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14; __m128 m15, m16, m17, m18, m19, m20, m21, m22, m23, m24; __m128 w1, w2, w3, w4; m1 = _mm_load_ps(&RE(cc[ac+i])); m2 = _mm_load_ps(&RE(cc[ac+ido+i])); m5 = _mm_load_ps(&RE(cc[ac+i+2])); m6 = _mm_load_ps(&RE(cc[ac+ido+i+2])); w1 = _mm_load_ps(&RE(wa[i])); w3 = _mm_load_ps(&RE(wa[i+2])); m3 = _mm_add_ps(m1, m2); m15 = _mm_add_ps(m5, m6); m4 = _mm_sub_ps(m1, m2); m16 = _mm_sub_ps(m5, m6); _mm_store_ps(&RE(ch[ah+i]), m3); _mm_store_ps(&RE(ch[ah+i+2]), m15); w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); m7 = _mm_mul_ps(m4, w1); m17 = _mm_mul_ps(m16, w3); m8 = _mm_mul_ps(m4, w2); m18 = _mm_mul_ps(m16, w4); m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); m19 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(2, 0, 2, 0)); m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); m20 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(3, 1, 3, 1)); m11 = _mm_add_ps(m9, m10); m21 = _mm_add_ps(m19, m20); m12 = _mm_sub_ps(m9, m10); m22 = _mm_sub_ps(m19, m20); m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); m23 = _mm_shuffle_ps(m21, m21, _MM_SHUFFLE(0, 0, 3, 2)); m14 = _mm_unpacklo_ps(m12, m13); m24 = _mm_unpacklo_ps(m22, m23); _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); _mm_store_ps(&RE(ch[ah+i+2+l1*ido]), m24); } } } #endif static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa) { uint16_t i, k, ah, ac; if (ido == 1) { for (k = 0; k < l1; k++) { ah = 2*k; ac = 4*k; RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); } } else { for (k = 0; k < l1; k++) { ah = k*ido; ac = 2*k*ido; for (i = 0; i < ido; i++) { complex_t t2; RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]); RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]); IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]); IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]); #if 1 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), IM(t2), RE(t2), RE(wa[i]), IM(wa[i])); #else ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), RE(t2), IM(t2), RE(wa[i]), IM(wa[i])); #endif } } } } static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa) { uint16_t i, k, ah, ac; if (ido == 1) { for (k = 0; k < l1; k++) { ah = 2*k; ac = 4*k; RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); } } else { for (k = 0; k < l1; k++) { ah = k*ido; ac = 2*k*ido; for (i = 0; i < ido; i++) { complex_t t2; RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]); RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]); IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]); IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]); #if 1 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), RE(t2), IM(t2), RE(wa[i]), IM(wa[i])); #else ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), IM(t2), RE(t2), RE(wa[i]), IM(wa[i])); #endif } } } } static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const int8_t isign) { static real_t taur = FRAC_CONST(-0.5); static real_t taui = FRAC_CONST(0.866025403784439); uint16_t i, k, ac, ah; complex_t c2, c3, d2, d3, t2; if (ido == 1) { if (isign == 1) { for (k = 0; k < l1; k++) { ac = 3*k+1; ah = k; RE(t2) = RE(cc[ac]) + RE(cc[ac+1]); IM(t2) = IM(cc[ac]) + IM(cc[ac+1]); RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),taur); IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),taur); RE(ch[ah]) = RE(cc[ac-1]) + RE(t2); IM(ch[ah]) = IM(cc[ac-1]) + IM(t2); RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+1])), taui); IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+1])), taui); RE(ch[ah+l1]) = RE(c2) - IM(c3); IM(ch[ah+l1]) = IM(c2) + RE(c3); RE(ch[ah+2*l1]) = RE(c2) + IM(c3); IM(ch[ah+2*l1]) = IM(c2) - RE(c3); } } else { for (k = 0; k < l1; k++) { ac = 3*k+1; ah = k; RE(t2) = RE(cc[ac]) + RE(cc[ac+1]); IM(t2) = IM(cc[ac]) + IM(cc[ac+1]); RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),taur); IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),taur); RE(ch[ah]) = RE(cc[ac-1]) + RE(t2); IM(ch[ah]) = IM(cc[ac-1]) + IM(t2); RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+1])), taui); IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+1])), taui); RE(ch[ah+l1]) = RE(c2) + IM(c3); IM(ch[ah+l1]) = IM(c2) - RE(c3); RE(ch[ah+2*l1]) = RE(c2) - IM(c3); IM(ch[ah+2*l1]) = IM(c2) + RE(c3); } } } else { if (isign == 1) { for (k = 0; k < l1; k++) { for (i = 0; i < ido; i++) { ac = i + (3*k+1)*ido; ah = i + k * ido; RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]); RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),taur); IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]); IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),taur); RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2); IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2); RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+ido])), taui); IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+ido])), taui); RE(d2) = RE(c2) - IM(c3); IM(d3) = IM(c2) - RE(c3); RE(d3) = RE(c2) + IM(c3); IM(d2) = IM(c2) + RE(c3); #if 1 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); #else ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); #endif } } } else { for (k = 0; k < l1; k++) { for (i = 0; i < ido; i++) { ac = i + (3*k+1)*ido; ah = i + k * ido; RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]); RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),taur); IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]); IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),taur); RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2); IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2); RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+ido])), taui); IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+ido])), taui); RE(d2) = RE(c2) + IM(c3); IM(d3) = IM(c2) + RE(c3); RE(d3) = RE(c2) - IM(c3); IM(d2) = IM(c2) - RE(c3); #if 1 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); #else ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); #endif } } } } } #ifdef USE_SSE ALIGN static const int32_t negate[4] = { 0x0, 0x0, 0x0, 0x80000000 }; __declspec(naked) static void passf4pos_sse(const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3) { __asm { push ebx mov ebx, esp and esp, -16 push edi push esi sub esp, 8 movzx edi, WORD PTR [ebx+8] movaps xmm1, XMMWORD PTR negate test edi, edi jle l1_is_zero lea esi, DWORD PTR [edi+edi] add esi, esi sub esi, edi add esi, esi add esi, esi add esi, esi mov eax, DWORD PTR [ebx+16] add esi, eax lea ecx, DWORD PTR [edi+edi] add ecx, ecx add ecx, ecx add ecx, ecx add ecx, eax lea edx, DWORD PTR [edi+edi] add edx, edx add edx, edx add edx, eax xor eax, eax mov DWORD PTR [esp], ebp mov ebp, DWORD PTR [ebx+12] fftloop: lea edi, DWORD PTR [eax+eax] add edi, edi movaps xmm2, XMMWORD PTR [ebp+edi*8] movaps xmm0, XMMWORD PTR [ebp+edi*8+16] movaps xmm7, XMMWORD PTR [ebp+edi*8+32] movaps xmm5, XMMWORD PTR [ebp+edi*8+48] movaps xmm6, xmm2 addps xmm6, xmm0 movaps xmm4, xmm1 xorps xmm4, xmm7 movaps xmm3, xmm1 xorps xmm3, xmm5 xorps xmm2, xmm1 xorps xmm0, xmm1 addps xmm7, xmm5 subps xmm2, xmm0 movaps xmm0, xmm6 shufps xmm0, xmm7, 68 subps xmm4, xmm3 shufps xmm6, xmm7, 238 movaps xmm5, xmm2 shufps xmm5, xmm4, 68 movaps xmm3, xmm0 addps xmm3, xmm6 shufps xmm2, xmm4, 187 subps xmm0, xmm6 movaps xmm4, xmm5 addps xmm4, xmm2 mov edi, DWORD PTR [ebx+16] movaps XMMWORD PTR [edi+eax*8], xmm3 subps xmm5, xmm2 movaps XMMWORD PTR [edx+eax*8], xmm4 movaps XMMWORD PTR [ecx+eax*8], xmm0 movaps XMMWORD PTR [esi+eax*8], xmm5 add eax, 2 movzx eax, ax movzx edi, WORD PTR [ebx+8] cmp eax, edi jl fftloop mov ebp, DWORD PTR [esp] l1_is_zero: add esp, 8 pop esi pop edi mov esp, ebx pop ebx ret } } #endif #if 0 static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3) { uint16_t i, k, ac, ah; for (k = 0; k < l1; k++) { ac = 4*k*ido; ah = k*ido; for (i = 0; i < ido; i+=2) { __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15, m16; __m128 n1, n2, n3, n4, n5, n6, n7, n8, n9, m17, m18, m19, m20, m21, m22, m23; __m128 w1, w2, w3, w4, w5, w6, m24, m25, m26, m27, m28, m29, m30; __m128 neg1 = _mm_set_ps(-1.0, 1.0, -1.0, 1.0); m1 = _mm_load_ps(&RE(cc[ac+i])); m2 = _mm_load_ps(&RE(cc[ac+i+2*ido])); m3 = _mm_add_ps(m1, m2); m4 = _mm_sub_ps(m1, m2); n1 = _mm_load_ps(&RE(cc[ac+i+ido])); n2 = _mm_load_ps(&RE(cc[ac+i+3*ido])); n3 = _mm_add_ps(n1, n2); n4 = _mm_mul_ps(neg1, n1); n5 = _mm_mul_ps(neg1, n2); n6 = _mm_sub_ps(n4, n5); m5 = _mm_add_ps(m3, n3); n7 = _mm_shuffle_ps(n6, n6, _MM_SHUFFLE(2, 3, 0, 1)); n8 = _mm_add_ps(m4, n7); m6 = _mm_sub_ps(m3, n3); n9 = _mm_sub_ps(m4, n7); _mm_store_ps(&RE(ch[ah+i]), m5); #if 0 static INLINE void ComplexMult(real_t *y1, real_t *y2, real_t x1, real_t x2, real_t c1, real_t c2) { *y1 = MUL_F(x1, c1) + MUL_F(x2, c2); *y2 = MUL_F(x2, c1) - MUL_F(x1, c2); } m7.0 = RE(c2)*RE(wa1[i]) m7.1 = IM(c2)*IM(wa1[i]) m7.2 = RE(c6)*RE(wa1[i+1]) m7.3 = IM(c6)*IM(wa1[i+1]) m8.0 = RE(c2)*IM(wa1[i]) m8.1 = IM(c2)*RE(wa1[i]) m8.2 = RE(c6)*IM(wa1[i+1]) m8.3 = IM(c6)*RE(wa1[i+1]) RE(0) = m7.0 - m7.1 IM(0) = m8.0 + m8.1 RE(1) = m7.2 - m7.3 IM(1) = m8.2 + m8.3 //// RE(0) = RE(c2)*RE(wa1[i]) - IM(c2)*IM(wa1[i]) IM(0) = RE(c2)*IM(wa1[i]) + IM(c2)*RE(wa1[i]) RE(1) = RE(c6)*RE(wa1[i+1]) - IM(c6)*IM(wa1[i+1]) IM(1) = RE(c6)*IM(wa1[i+1]) + IM(c6)*RE(wa1[i+1]) #endif w1 = _mm_load_ps(&RE(wa1[i])); w3 = _mm_load_ps(&RE(wa2[i])); w5 = _mm_load_ps(&RE(wa3[i])); w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); w6 = _mm_shuffle_ps(w5, w5, _MM_SHUFFLE(2, 3, 0, 1)); m7 = _mm_mul_ps(n8, w1); m15 = _mm_mul_ps(m6, w3); m23 = _mm_mul_ps(n9, w5); m8 = _mm_mul_ps(n8, w2); m16 = _mm_mul_ps(m6, w4); m24 = _mm_mul_ps(n9, w6); m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); m17 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(2, 0, 2, 0)); m25 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(2, 0, 2, 0)); m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); m18 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(3, 1, 3, 1)); m26 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(3, 1, 3, 1)); m11 = _mm_add_ps(m9, m10); m19 = _mm_add_ps(m17, m18); m27 = _mm_add_ps(m25, m26); m12 = _mm_sub_ps(m9, m10); m20 = _mm_sub_ps(m17, m18); m28 = _mm_sub_ps(m25, m26); m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); m21 = _mm_shuffle_ps(m19, m19, _MM_SHUFFLE(0, 0, 3, 2)); m29 = _mm_shuffle_ps(m27, m27, _MM_SHUFFLE(0, 0, 3, 2)); m14 = _mm_unpacklo_ps(m12, m13); m22 = _mm_unpacklo_ps(m20, m21); m30 = _mm_unpacklo_ps(m28, m29); _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); _mm_store_ps(&RE(ch[ah+i+2*l1*ido]), m22); _mm_store_ps(&RE(ch[ah+i+3*l1*ido]), m30); } } } #endif static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3) { uint16_t i, k, ac, ah; if (ido == 1) { for (k = 0; k < l1; k++) { complex_t t1, t2, t3, t4; ac = 4*k; ah = k; RE(t2) = RE(cc[ac]) + RE(cc[ac+2]); RE(t1) = RE(cc[ac]) - RE(cc[ac+2]); IM(t2) = IM(cc[ac]) + IM(cc[ac+2]); IM(t1) = IM(cc[ac]) - IM(cc[ac+2]); RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]); IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]); IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]); RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]); RE(ch[ah]) = RE(t2) + RE(t3); RE(ch[ah+2*l1]) = RE(t2) - RE(t3); IM(ch[ah]) = IM(t2) + IM(t3); IM(ch[ah+2*l1]) = IM(t2) - IM(t3); RE(ch[ah+l1]) = RE(t1) + RE(t4); RE(ch[ah+3*l1]) = RE(t1) - RE(t4); IM(ch[ah+l1]) = IM(t1) + IM(t4); IM(ch[ah+3*l1]) = IM(t1) - IM(t4); } } else { for (k = 0; k < l1; k++) { ac = 4*k*ido; ah = k*ido; for (i = 0; i < ido; i++) { complex_t c2, c3, c4, t1, t2, t3, t4; RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]); RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]); IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]); IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]); RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]); IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]); IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]); RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]); RE(c2) = RE(t1) + RE(t4); RE(c4) = RE(t1) - RE(t4); IM(c2) = IM(t1) + IM(t4); IM(c4) = IM(t1) - IM(t4); RE(ch[ah+i]) = RE(t2) + RE(t3); RE(c3) = RE(t2) - RE(t3); IM(ch[ah+i]) = IM(t2) + IM(t3); IM(c3) = IM(t2) - IM(t3); #if 1 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), IM(c2), RE(c2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&IM(ch[ah+i+2*l1*ido]), &RE(ch[ah+i+2*l1*ido]), IM(c3), RE(c3), RE(wa2[i]), IM(wa2[i])); ComplexMult(&IM(ch[ah+i+3*l1*ido]), &RE(ch[ah+i+3*l1*ido]), IM(c4), RE(c4), RE(wa3[i]), IM(wa3[i])); #else ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), RE(c2), IM(c2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&RE(ch[ah+i+2*l1*ido]), &IM(ch[ah+i+2*l1*ido]), RE(c3), IM(c3), RE(wa2[i]), IM(wa2[i])); ComplexMult(&RE(ch[ah+i+3*l1*ido]), &IM(ch[ah+i+3*l1*ido]), RE(c4), IM(c4), RE(wa3[i]), IM(wa3[i])); #endif } } } } static void passf4neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3) { uint16_t i, k, ac, ah; if (ido == 1) { for (k = 0; k < l1; k++) { complex_t t1, t2, t3, t4; ac = 4*k; ah = k; RE(t2) = RE(cc[ac]) + RE(cc[ac+2]); RE(t1) = RE(cc[ac]) - RE(cc[ac+2]); IM(t2) = IM(cc[ac]) + IM(cc[ac+2]); IM(t1) = IM(cc[ac]) - IM(cc[ac+2]); RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]); IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]); IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]); RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]); RE(ch[ah]) = RE(t2) + RE(t3); RE(ch[ah+2*l1]) = RE(t2) - RE(t3); IM(ch[ah]) = IM(t2) + IM(t3); IM(ch[ah+2*l1]) = IM(t2) - IM(t3); RE(ch[ah+l1]) = RE(t1) - RE(t4); RE(ch[ah+3*l1]) = RE(t1) + RE(t4); IM(ch[ah+l1]) = IM(t1) - IM(t4); IM(ch[ah+3*l1]) = IM(t1) + IM(t4); } } else { for (k = 0; k < l1; k++) { ac = 4*k*ido; ah = k*ido; for (i = 0; i < ido; i++) { complex_t c2, c3, c4, t1, t2, t3, t4; RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]); RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]); IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]); IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]); RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]); IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]); IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]); RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]); RE(c2) = RE(t1) - RE(t4); RE(c4) = RE(t1) + RE(t4); IM(c2) = IM(t1) - IM(t4); IM(c4) = IM(t1) + IM(t4); RE(ch[ah+i]) = RE(t2) + RE(t3); RE(c3) = RE(t2) - RE(t3); IM(ch[ah+i]) = IM(t2) + IM(t3); IM(c3) = IM(t2) - IM(t3); #if 1 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), RE(c2), IM(c2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&RE(ch[ah+i+2*l1*ido]), &IM(ch[ah+i+2*l1*ido]), RE(c3), IM(c3), RE(wa2[i]), IM(wa2[i])); ComplexMult(&RE(ch[ah+i+3*l1*ido]), &IM(ch[ah+i+3*l1*ido]), RE(c4), IM(c4), RE(wa3[i]), IM(wa3[i])); #else ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), IM(c2), RE(c2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&IM(ch[ah+i+2*l1*ido]), &RE(ch[ah+i+2*l1*ido]), IM(c3), RE(c3), RE(wa2[i]), IM(wa2[i])); ComplexMult(&IM(ch[ah+i+3*l1*ido]), &RE(ch[ah+i+3*l1*ido]), IM(c4), RE(c4), RE(wa3[i]), IM(wa3[i])); #endif } } } } static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3, const complex_t *wa4, const int8_t isign) { static real_t tr11 = FRAC_CONST(0.309016994374947); static real_t ti11 = FRAC_CONST(0.951056516295154); static real_t tr12 = FRAC_CONST(-0.809016994374947); static real_t ti12 = FRAC_CONST(0.587785252292473); uint16_t i, k, ac, ah; complex_t c2, c3, c4, c5, d3, d4, d5, d2, t2, t3, t4, t5; if (ido == 1) { if (isign == 1) { for (k = 0; k < l1; k++) { ac = 5*k + 1; ah = k; RE(t2) = RE(cc[ac]) + RE(cc[ac+3]); IM(t2) = IM(cc[ac]) + IM(cc[ac+3]); RE(t3) = RE(cc[ac+1]) + RE(cc[ac+2]); IM(t3) = IM(cc[ac+1]) + IM(cc[ac+2]); RE(t4) = RE(cc[ac+1]) - RE(cc[ac+2]); IM(t4) = IM(cc[ac+1]) - IM(cc[ac+2]); RE(t5) = RE(cc[ac]) - RE(cc[ac+3]); IM(t5) = IM(cc[ac]) - IM(cc[ac+3]); RE(ch[ah]) = RE(cc[ac-1]) + RE(t2) + RE(t3); IM(ch[ah]) = IM(cc[ac-1]) + IM(t2) + IM(t3); RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); RE(c3) = RE(cc[ac-1]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); IM(c3) = IM(cc[ac-1]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); ComplexMult(&RE(c5), &RE(c4), ti11, ti12, RE(t5), RE(t4)); ComplexMult(&IM(c5), &IM(c4), ti11, ti12, IM(t5), IM(t4)); RE(ch[ah+l1]) = RE(c2) - IM(c5); IM(ch[ah+l1]) = IM(c2) + RE(c5); RE(ch[ah+2*l1]) = RE(c3) - IM(c4); IM(ch[ah+2*l1]) = IM(c3) + RE(c4); RE(ch[ah+3*l1]) = RE(c3) + IM(c4); IM(ch[ah+3*l1]) = IM(c3) - RE(c4); RE(ch[ah+4*l1]) = RE(c2) + IM(c5); IM(ch[ah+4*l1]) = IM(c2) - RE(c5); } } else { for (k = 0; k < l1; k++) { ac = 5*k + 1; ah = k; RE(t2) = RE(cc[ac]) + RE(cc[ac+3]); IM(t2) = IM(cc[ac]) + IM(cc[ac+3]); RE(t3) = RE(cc[ac+1]) + RE(cc[ac+2]); IM(t3) = IM(cc[ac+1]) + IM(cc[ac+2]); RE(t4) = RE(cc[ac+1]) - RE(cc[ac+2]); IM(t4) = IM(cc[ac+1]) - IM(cc[ac+2]); RE(t5) = RE(cc[ac]) - RE(cc[ac+3]); IM(t5) = IM(cc[ac]) - IM(cc[ac+3]); RE(ch[ah]) = RE(cc[ac-1]) + RE(t2) + RE(t3); IM(ch[ah]) = IM(cc[ac-1]) + IM(t2) + IM(t3); RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); RE(c3) = RE(cc[ac-1]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); IM(c3) = IM(cc[ac-1]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); ComplexMult(&RE(c4), &RE(c5), ti12, ti11, RE(t5), RE(t4)); ComplexMult(&IM(c4), &IM(c5), ti12, ti12, IM(t5), IM(t4)); RE(ch[ah+l1]) = RE(c2) + IM(c5); IM(ch[ah+l1]) = IM(c2) - RE(c5); RE(ch[ah+2*l1]) = RE(c3) + IM(c4); IM(ch[ah+2*l1]) = IM(c3) - RE(c4); RE(ch[ah+3*l1]) = RE(c3) - IM(c4); IM(ch[ah+3*l1]) = IM(c3) + RE(c4); RE(ch[ah+4*l1]) = RE(c2) - IM(c5); IM(ch[ah+4*l1]) = IM(c2) + RE(c5); } } } else { if (isign == 1) { for (k = 0; k < l1; k++) { for (i = 0; i < ido; i++) { ac = i + (k*5 + 1) * ido; ah = i + k * ido; RE(t2) = RE(cc[ac]) + RE(cc[ac+3*ido]); IM(t2) = IM(cc[ac]) + IM(cc[ac+3*ido]); RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+2*ido]); IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+2*ido]); RE(t4) = RE(cc[ac+ido]) - RE(cc[ac+2*ido]); IM(t4) = IM(cc[ac+ido]) - IM(cc[ac+2*ido]); RE(t5) = RE(cc[ac]) - RE(cc[ac+3*ido]); IM(t5) = IM(cc[ac]) - IM(cc[ac+3*ido]); RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2) + RE(t3); IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2) + IM(t3); RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); RE(c3) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); IM(c3) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); ComplexMult(&RE(c5), &RE(c4), ti11, ti12, RE(t5), RE(t4)); ComplexMult(&IM(c5), &IM(c4), ti11, ti12, IM(t5), IM(t4)); IM(d2) = IM(c2) + RE(c5); IM(d3) = IM(c3) + RE(c4); RE(d4) = RE(c3) + IM(c4); RE(d5) = RE(c2) + IM(c5); RE(d2) = RE(c2) - IM(c5); IM(d5) = IM(c2) - RE(c5); RE(d3) = RE(c3) - IM(c4); IM(d4) = IM(c3) - RE(c4); #if 1 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); ComplexMult(&IM(ch[ah+3*l1*ido]), &RE(ch[ah+3*l1*ido]), IM(d4), RE(d4), RE(wa3[i]), IM(wa3[i])); ComplexMult(&IM(ch[ah+4*l1*ido]), &RE(ch[ah+4*l1*ido]), IM(d5), RE(d5), RE(wa4[i]), IM(wa4[i])); #else ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); ComplexMult(&RE(ch[ah+3*l1*ido]), &IM(ch[ah+3*l1*ido]), RE(d4), IM(d4), RE(wa3[i]), IM(wa3[i])); ComplexMult(&RE(ch[ah+4*l1*ido]), &IM(ch[ah+4*l1*ido]), RE(d5), IM(d5), RE(wa4[i]), IM(wa4[i])); #endif } } } else { for (k = 0; k < l1; k++) { for (i = 0; i < ido; i++) { ac = i + (k*5 + 1) * ido; ah = i + k * ido; RE(t2) = RE(cc[ac]) + RE(cc[ac+3*ido]); IM(t2) = IM(cc[ac]) + IM(cc[ac+3*ido]); RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+2*ido]); IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+2*ido]); RE(t4) = RE(cc[ac+ido]) - RE(cc[ac+2*ido]); IM(t4) = IM(cc[ac+ido]) - IM(cc[ac+2*ido]); RE(t5) = RE(cc[ac]) - RE(cc[ac+3*ido]); IM(t5) = IM(cc[ac]) - IM(cc[ac+3*ido]); RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2) + RE(t3); IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2) + IM(t3); RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); RE(c3) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); IM(c3) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); ComplexMult(&RE(c4), &RE(c5), ti12, ti11, RE(t5), RE(t4)); ComplexMult(&IM(c4), &IM(c5), ti12, ti12, IM(t5), IM(t4)); IM(d2) = IM(c2) - RE(c5); IM(d3) = IM(c3) - RE(c4); RE(d4) = RE(c3) - IM(c4); RE(d5) = RE(c2) - IM(c5); RE(d2) = RE(c2) + IM(c5); IM(d5) = IM(c2) + RE(c5); RE(d3) = RE(c3) + IM(c4); IM(d4) = IM(c3) + RE(c4); #if 1 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); ComplexMult(&RE(ch[ah+3*l1*ido]), &IM(ch[ah+3*l1*ido]), RE(d4), IM(d4), RE(wa3[i]), IM(wa3[i])); ComplexMult(&RE(ch[ah+4*l1*ido]), &IM(ch[ah+4*l1*ido]), RE(d5), IM(d5), RE(wa4[i]), IM(wa4[i])); #else ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); ComplexMult(&IM(ch[ah+3*l1*ido]), &RE(ch[ah+3*l1*ido]), IM(d4), RE(d4), RE(wa3[i]), IM(wa3[i])); ComplexMult(&IM(ch[ah+4*l1*ido]), &RE(ch[ah+4*l1*ido]), IM(d5), RE(d5), RE(wa4[i]), IM(wa4[i])); #endif } } } } } /*---------------------------------------------------------------------- cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs. ----------------------------------------------------------------------*/ #ifdef USE_SSE #define CONV(A,B,C) ( (A<<2) | ((B & 0x1)<<1) | ((C==1)&0x1) ) static INLINE void cfftf1pos_sse(uint16_t n, complex_t *c, complex_t *ch, const uint16_t *ifac, const complex_t *wa, const int8_t isign) { uint16_t i; uint16_t k1, l1, l2; uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; nf = ifac[1]; na = 0; l1 = 1; iw = 0; for (k1 = 2; k1 <= nf+1; k1++) { ip = ifac[k1]; l2 = ip*l1; ido = n / l2; idl1 = ido*l1; ix2 = iw + ido; ix3 = ix2 + ido; ix4 = ix3 + ido; switch (CONV(ip,na,ido)) { case CONV(4,0,0): //passf4pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); break; case CONV(4,0,1): passf4pos_sse((const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); break; case CONV(4,1,0): passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); //passf4pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); break; case CONV(4,1,1): passf4pos_sse((const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); break; case CONV(2,0,0): passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); //passf2pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); break; case CONV(2,0,1): passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); //passf2pos_sse((const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); break; case CONV(2,1,0): passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); //passf2pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); break; case CONV(2,1,1): passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); //passf2pos_sse((const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); break; case CONV(3,0,0): case CONV(3,0,1): passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); break; case CONV(3,1,0): case CONV(3,1,1): passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); break; case CONV(5,0,0): case CONV(5,0,1): passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); break; case CONV(5,1,0): case CONV(5,1,1): passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); break; } na = 1 - na; l1 = l2; iw += (ip-1) * ido; } if (na == 0) return; for (i = 0; i < n; i++) { RE(c[i]) = RE(ch[i]); IM(c[i]) = IM(ch[i]); } } #endif static INLINE void cfftf1pos(uint16_t n, complex_t *c, complex_t *ch, const uint16_t *ifac, const complex_t *wa, const int8_t isign) { uint16_t i; uint16_t k1, l1, l2; uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; nf = ifac[1]; na = 0; l1 = 1; iw = 0; for (k1 = 2; k1 <= nf+1; k1++) { ip = ifac[k1]; l2 = ip*l1; ido = n / l2; idl1 = ido*l1; switch (ip) { case 4: ix2 = iw + ido; ix3 = ix2 + ido; if (na == 0) passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); else passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); na = 1 - na; break; case 2: if (na == 0) passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); else passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); na = 1 - na; break; case 3: ix2 = iw + ido; if (na == 0) passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); else passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); na = 1 - na; break; case 5: ix2 = iw + ido; ix3 = ix2 + ido; ix4 = ix3 + ido; if (na == 0) passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); else passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); na = 1 - na; break; } l1 = l2; iw += (ip-1) * ido; } if (na == 0) return; for (i = 0; i < n; i++) { RE(c[i]) = RE(ch[i]); IM(c[i]) = IM(ch[i]); } } static INLINE void cfftf1neg(uint16_t n, complex_t *c, complex_t *ch, const uint16_t *ifac, const complex_t *wa, const int8_t isign) { uint16_t i; uint16_t k1, l1, l2; uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; nf = ifac[1]; na = 0; l1 = 1; iw = 0; for (k1 = 2; k1 <= nf+1; k1++) { ip = ifac[k1]; l2 = ip*l1; ido = n / l2; idl1 = ido*l1; switch (ip) { case 4: ix2 = iw + ido; ix3 = ix2 + ido; if (na == 0) passf4neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); else passf4neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); na = 1 - na; break; case 2: if (na == 0) passf2neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); else passf2neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); na = 1 - na; break; case 3: ix2 = iw + ido; if (na == 0) passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); else passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); na = 1 - na; break; case 5: ix2 = iw + ido; ix3 = ix2 + ido; ix4 = ix3 + ido; if (na == 0) passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); else passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); na = 1 - na; break; } l1 = l2; iw += (ip-1) * ido; } if (na == 0) return; for (i = 0; i < n; i++) { RE(c[i]) = RE(ch[i]); IM(c[i]) = IM(ch[i]); } } void cfftf(cfft_info *cfft, complex_t *c) { cfftf1neg(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, -1); } void cfftb(cfft_info *cfft, complex_t *c) { cfftf1pos(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1); } #ifdef USE_SSE void cfftb_sse(cfft_info *cfft, complex_t *c) { cfftf1pos_sse(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1); } #endif static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac) { static uint16_t ntryh[4] = {3, 4, 2, 5}; #ifndef FIXED_POINT real_t arg, argh, argld, fi; uint16_t ido, ipm; uint16_t i1, k1, l1, l2; uint16_t ld, ii, ip; #endif uint16_t ntry = 0, i, j; uint16_t ib; uint16_t nf, nl, nq, nr; nl = n; nf = 0; j = 0; startloop: j++; if (j <= 4) ntry = ntryh[j-1]; else ntry += 2; do { nq = nl / ntry; nr = nl - ntry*nq; if (nr != 0) goto startloop; nf++; ifac[nf+1] = ntry; nl = nq; if (ntry == 2 && nf != 1) { for (i = 2; i <= nf; i++) { ib = nf - i + 2; ifac[ib+1] = ifac[ib]; } ifac[2] = 2; } } while (nl != 1); ifac[0] = n; ifac[1] = nf; #ifndef FIXED_POINT argh = (real_t)2.0*(real_t)M_PI / (real_t)n; i = 0; l1 = 1; for (k1 = 1; k1 <= nf; k1++) { ip = ifac[k1+1]; ld = 0; l2 = l1*ip; ido = n / l2; ipm = ip - 1; for (j = 0; j < ipm; j++) { i1 = i; RE(wa[i]) = 1.0; IM(wa[i]) = 0.0; ld += l1; fi = 0; argld = ld*argh; for (ii = 0; ii < ido; ii++) { i++; fi++; arg = fi * argld; RE(wa[i]) = (real_t)cos(arg); #if 1 IM(wa[i]) = (real_t)sin(arg); #else IM(wa[i]) = (real_t)-sin(arg); #endif } if (ip > 5) { RE(wa[i1]) = RE(wa[i]); IM(wa[i1]) = IM(wa[i]); } } l1 = l2; } #endif } cfft_info *cffti(uint16_t n) { cfft_info *cfft = (cfft_info*)faad_malloc(sizeof(cfft_info)); cfft->n = n; cfft->work = (complex_t*)faad_malloc(n*sizeof(complex_t)); #ifndef FIXED_POINT cfft->tab = (complex_t*)faad_malloc(n*sizeof(complex_t)); cffti1(n, cfft->tab, cfft->ifac); #else cffti1(n, NULL, cfft->ifac); switch (n) { case 64: cfft->tab = cfft_tab_64; break; case 512: cfft->tab = cfft_tab_512; break; #ifdef LD_DEC case 256: cfft->tab = cfft_tab_256; break; #endif #ifdef ALLOW_SMALL_FRAMELENGTH case 60: cfft->tab = cfft_tab_60; break; case 480: cfft->tab = cfft_tab_480; break; #ifdef LD_DEC case 240: cfft->tab = cfft_tab_240; break; #endif #endif } #endif return cfft; } void cfftu(cfft_info *cfft) { if (cfft->work) faad_free(cfft->work); #ifndef FIXED_POINT if (cfft->tab) faad_free(cfft->tab); #endif if (cfft) faad_free(cfft); }