ref: 9a9fa10c760b59e48c567d408de3878498755f84
dir: /libfaad/cfft.c/
/* ** FAAD - Freeware Advanced Audio Decoder ** Copyright (C) 2002 M. Bakker ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 2 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ** ** $Id: cfft.c,v 1.1 2002/07/14 19:11:11 menno Exp $ **/ /* * Algorithmically based on Fortran-77 FFTPACK * by Paul N. Swarztrauber(Version 4, 1985). */ /* isign is +1 for backward and -1 for forward transforms */ #include "common.h" #include <stdlib.h> #include "cfft.h" /*---------------------------------------------------------------------- passf2, passf3, passf4, passf5, passf. Complex FFT passes fwd and bwd. ----------------------------------------------------------------------*/ static void passf2(uint16_t ido, uint16_t l1, real_t *cc, real_t *ch, real_t *wa1, int8_t isign) { uint16_t i, k, ah, ac; real_t ti2, tr2; if (ido <= 2) { for (k = 0; k < l1; k++) { ah = k*ido; ac = 2*k*ido; ch[ah] = cc[ac] + cc[ac+ido]; ch[ah+ido*l1] = cc[ac] - cc[ac+ido]; ch[ah+1] = cc[ac+1] + cc[ac+ido+1]; ch[ah+ido*l1+1] = cc[ac+1] - cc[ac+ido+1]; } } else { for (k = 0; k < l1; k++) { for(i = 0; i < ido-1; i += 2) { ah = i + k*ido; ac = i + 2*k*ido; ch[ah] = cc[ac] + cc[ac+ido]; tr2 = cc[ac] - cc[ac+ido]; ch[ah+1] = cc[ac+1] + cc[ac+1+ido]; ti2 = cc[ac+1] - cc[ac+1+ido]; ch[ah+l1*ido+1] = wa1[i]*ti2 + isign*wa1[i+1]*tr2; ch[ah+l1*ido] = wa1[i]*tr2 - isign*wa1[i+1]*ti2; } } } } static void passf3(uint16_t ido, uint16_t l1, real_t *cc, real_t *ch, real_t *wa1, real_t *wa2, int8_t isign) { static real_t taur = -0.5; static real_t taui = 0.866025403784439; uint16_t i, k, ac, ah; real_t ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2; if (ido == 2) { for (k = 1; k <= l1; k++) { ac = (3*k-2) * ido; tr2 = cc[ac] + cc[ac+ido]; cr2 = cc[ac-ido] + taur*tr2; ah = (k-1) * ido; ch[ah] = cc[ac-ido] + tr2; ti2 = cc[ac+1] + cc[ac+ido+1]; ci2 = cc[ac-ido+1] + taur*ti2; ch[ah+1] = cc[ac-ido+1] + ti2; cr3 = isign * taui * (cc[ac] - cc[ac+ido]); ci3 = isign * taui * (cc[ac+1] - cc[ac+ido+1]); ch[ah+l1*ido] = cr2 - ci3; ch[ah+2*l1*ido] = cr2 + ci3; ch[ah+l1*ido+1] = ci2 + cr3; ch[ah+2*l1*ido+1] = ci2 - cr3; } } else { for (k = 1; k <= l1; k++) { for (i = 0; i < ido-1; i += 2) { ac = i + (3*k-2) * ido; tr2 = cc[ac] + cc[ac+ido]; cr2 = cc[ac-ido] + taur*tr2; ah = i + (k-1) * ido; ch[ah] = cc[ac-ido] + tr2; ti2 = cc[ac+1] + cc[ac+ido+1]; ci2 = cc[ac-ido+1] + taur*ti2; ch[ah+1] = cc[ac-ido+1] + ti2; cr3 = isign * taui * (cc[ac] - cc[ac+ido]); ci3 = isign * taui * (cc[ac+1] - cc[ac+ido+1]); dr2 = cr2 - ci3; dr3 = cr2 + ci3; di2 = ci2 + cr3; di3 = ci2 - cr3; ch[ah+l1*ido+1] = wa1[i]*di2 + isign*wa1[i+1]*dr2; ch[ah+l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2; ch[ah+2*l1*ido+1] = wa2[i]*di3 + isign*wa2[i+1]*dr3; ch[ah+2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3; } } } } static void passf4(uint16_t ido, uint16_t l1, real_t *cc, real_t *ch, real_t *wa1, real_t *wa2, real_t *wa3, int8_t isign) { uint16_t i, k, ac, ah; real_t ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; if (ido == 2) { for (k = 0; k < l1; k++) { ac = 4*k*ido + 1; ti1 = cc[ac] - cc[ac+2*ido]; ti2 = cc[ac] + cc[ac+2*ido]; tr4 = cc[ac+3*ido] - cc[ac+ido]; ti3 = cc[ac+ido] + cc[ac+3*ido]; tr1 = cc[ac-1] - cc[ac+2*ido-1]; tr2 = cc[ac-1] + cc[ac+2*ido-1]; ti4 = cc[ac+ido-1] - cc[ac+3*ido-1]; tr3 = cc[ac+ido-1] + cc[ac+3*ido-1]; ah = k*ido; ch[ah] = tr2 + tr3; ch[ah+2*l1*ido] = tr2 - tr3; ch[ah+1] = ti2 + ti3; ch[ah+2*l1*ido+1] = ti2 - ti3; ch[ah+l1*ido] = tr1 + isign*tr4; ch[ah+3*l1*ido] = tr1 - isign*tr4; ch[ah+l1*ido+1] = ti1 + isign*ti4; ch[ah+3*l1*ido+1] = ti1 - isign*ti4; } } else { for (k = 0; k < l1; k++) { for (i = 0; i < ido-1; i += 2) { ac = i + 1 + 4*k*ido; ti1 = cc[ac] - cc[ac+2*ido]; ti2 = cc[ac] + cc[ac+2*ido]; ti3 = cc[ac+ido] + cc[ac+3*ido]; tr4 = cc[ac+3*ido] - cc[ac+ido]; tr1 = cc[ac-1] - cc[ac+2*ido-1]; tr2 = cc[ac-1] + cc[ac+2*ido-1]; ti4 = cc[ac+ido-1] - cc[ac+3*ido-1]; tr3 = cc[ac+ido-1] + cc[ac+3*ido-1]; ah = i + k*ido; ch[ah] = tr2 + tr3; cr3 = tr2 - tr3; ch[ah+1] = ti2 + ti3; ci3 = ti2 - ti3; cr2 = tr1 + isign*tr4; cr4 = tr1 - isign*tr4; ci2 = ti1 + isign*ti4; ci4 = ti1 - isign*ti4; ch[ah+l1*ido] = wa1[i]*cr2 - isign*wa1[i+1]*ci2; ch[ah+l1*ido+1] = wa1[i]*ci2 + isign*wa1[i+1]*cr2; ch[ah+2*l1*ido] = wa2[i]*cr3 - isign*wa2[i+1]*ci3; ch[ah+2*l1*ido+1] = wa2[i]*ci3 + isign*wa2[i+1]*cr3; ch[ah+3*l1*ido] = wa3[i]*cr4 - isign*wa3[i+1]*ci4; ch[ah+3*l1*ido+1] = wa3[i]*ci4 + isign*wa3[i+1]*cr4; } } } } static void passf5(uint16_t ido, uint16_t l1, real_t *cc, real_t *ch, real_t *wa1, real_t *wa2, real_t *wa3, real_t *wa4, int8_t isign) { static real_t tr11 = 0.309016994374947; static real_t ti11 = 0.951056516295154; static real_t tr12 = -0.809016994374947; static real_t ti12 = 0.587785252292473; uint16_t i, k, ac, ah; real_t ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5; if (ido == 2) { for (k = 1; k <= l1; ++k) { ac = (5*k-4) * ido + 1; ti5 = cc[ac] - cc[ac+3*ido]; ti2 = cc[ac] + cc[ac+3*ido]; ti4 = cc[ac+ido] - cc[ac+2*ido]; ti3 = cc[ac+ido] + cc[ac+2*ido]; tr5 = cc[ac-1] - cc[ac+3*ido-1]; tr2 = cc[ac-1] + cc[ac+3*ido-1]; tr4 = cc[ac+ido-1] - cc[ac+2*ido-1]; tr3 = cc[ac+ido-1] + cc[ac+2*ido-1]; ah = (k-1) * ido; ch[ah] = cc[ac-ido-1] + tr2 + tr3; ch[ah+1] = cc[ac-ido] + ti2 + ti3; cr2 = cc[ac-ido-1] + tr11*tr2 + tr12*tr3; ci2 = cc[ac-ido] + tr11*ti2 + tr12*ti3; cr3 = cc[ac-ido-1] + tr12*tr2 + tr11*tr3; ci3 = cc[ac-ido] + tr12*ti2 + tr11*ti3; cr5 = isign * (ti11*tr5 + ti12*tr4); ci5 = isign * (ti11*ti5 + ti12*ti4); cr4 = isign * (ti12*tr5 - ti11*tr4); ci4 = isign * (ti12*ti5 - ti11*ti4); ch[ah+l1*ido] = cr2 - ci5; ch[ah+4*l1*ido] = cr2 + ci5; ch[ah+l1*ido+1] = ci2 + cr5; ch[ah+2*l1*ido+1]=ci3 + cr4; ch[ah+2*l1*ido] = cr3 - ci4; ch[ah+3*l1*ido] = cr3 + ci4; ch[ah+3*l1*ido+1] = ci3 - cr4; ch[ah+4*l1*ido+1] = ci2 - cr5; } } else { for (k = 1; k <= l1; k++) { for (i = 0; i < ido-1; i += 2) { ac = i + 1 + (k*5-4) * ido; ti5 = cc[ac] - cc[ac+3*ido]; ti2 = cc[ac] + cc[ac+3*ido]; ti4 = cc[ac+ido] - cc[ac+2*ido]; ti3 = cc[ac+ido] + cc[ac+2*ido]; tr5 = cc[ac-1] - cc[ac+3*ido-1]; tr2 = cc[ac-1] + cc[ac+3*ido-1]; tr4 = cc[ac+ido-1] - cc[ac+2*ido-1]; tr3 = cc[ac+ido-1] + cc[ac+2*ido-1]; ah = i + (k-1) * ido; ch[ah] = cc[ac-ido-1] + tr2 + tr3; ch[ah+1] = cc[ac-ido] + ti2 + ti3; cr2 = cc[ac-ido-1] + tr11*tr2 + tr12*tr3; ci2 = cc[ac-ido] + tr11*ti2 + tr12*ti3; cr3 = cc[ac-ido-1] + tr12*tr2 + tr11*tr3; ci3 = cc[ac-ido] + tr12*ti2 + tr11*ti3; cr5 = isign * (ti11*tr5 + ti12*tr4); ci5 = isign * (ti11*ti5 + ti12*ti4); cr4 = isign * (ti12*tr5 - ti11*tr4); ci4 = isign * (ti12*ti5 - ti11*ti4); dr3 = cr3 - ci4; dr4 = cr3 + ci4; di3 = ci3 + cr4; di4 = ci3 - cr4; dr5 = cr2 + ci5; dr2 = cr2 - ci5; di5 = ci2 - cr5; di2 = ci2 + cr5; ch[ah+l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2; ch[ah+l1*ido+1] = wa1[i]*di2 + isign*wa1[i+1]*dr2; ch[ah+2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3; ch[ah+2*l1*ido+1] = wa2[i]*di3 + isign*wa2[i+1]*dr3; ch[ah+3*l1*ido] = wa3[i]*dr4 - isign*wa3[i+1]*di4; ch[ah+3*l1*ido+1] = wa3[i]*di4 + isign*wa3[i+1]*dr4; ch[ah+4*l1*ido] = wa4[i]*dr5 - isign*wa4[i+1]*di5; ch[ah+4*l1*ido+1] = wa4[i]*di5 + isign*wa4[i+1]*dr5; } } } } static void passf(uint16_t *nac, uint16_t ido, uint16_t ip, uint16_t l1, uint16_t idl1, real_t *cc, real_t *ch, real_t *wa, int8_t isign) { uint16_t idij, idlj, idot, ipph, i, j, k, l, jc, lc, ik, nt, idj, idl; uint16_t inc, idp; real_t wai, war; idot = ido / 2; nt = ip*idl1; ipph = (ip+1) / 2; idp = ip*ido; if (ido >= l1) { for (j = 1; j < ipph; j++) { jc = ip - j; for (k = 0; k < l1; k++) { for (i = 0; i < ido; i++) { ch[i+(k+j*l1)*ido] = cc[i+(j+k*ip)*ido] + cc[i+(jc+k*ip)*ido]; ch[i+(k+jc*l1)*ido] = cc[i+(j+k*ip)*ido] - cc[i+(jc+k*ip)*ido]; } } } for (k = 0; k < l1; k++) { for (i = 0; i < ido; i++) ch[i+k*ido] = cc[i+k*ip*ido]; } } else { for (j = 1; j < ipph; j++) { jc = ip - j; for (i = 0; i < ido; i++) { for (k = 0; k < l1; k++) { ch[i+(k+j*l1)*ido] = cc[i+(j+k*ip)*ido] + cc[i+(jc+k*ip)*ido]; ch[i+(k+jc*l1)*ido] = cc[i+(j+k*ip)*ido] - cc[i+(jc+k*ip)*ido]; } } } for (i = 0; i < ido; i++) { for (k = 0; k < l1; k++) ch[i+k*ido] = cc[i+k*ip*ido]; } } idl = 2 - ido; inc = 0; for (l = 1; l < ipph; l++) { lc = ip - l; idl += ido; for (ik = 0; ik < idl1; ik++) { cc[ik+l*idl1] = ch[ik] + wa[idl-2]*ch[ik+idl1]; cc[ik+lc*idl1] = isign*wa[idl-1]*ch[ik+(ip-1)*idl1]; } idlj = idl; inc += ido; for (j = 2; j < ipph; j++) { jc = ip - j; idlj += inc; if (idlj > idp) idlj -= idp; war = wa[idlj-2]; wai = wa[idlj-1]; for (ik = 0; ik < idl1; ik++) { cc[ik+l*idl1] += war*ch[ik+j*idl1]; cc[ik+lc*idl1] += isign*wai*ch[ik+jc*idl1]; } } } for (j = 1; j < ipph; j++) { for (ik = 0; ik < idl1; ik++) ch[ik] += ch[ik+j*idl1]; } for (j = 1; j < ipph; j++) { jc = ip - j; for (ik = 1; ik < idl1; ik += 2) { ch[ik-1+j*idl1] = cc[ik-1+j*idl1] - cc[ik+jc*idl1]; ch[ik-1+jc*idl1] = cc[ik-1+j*idl1] + cc[ik+jc*idl1]; ch[ik+j*idl1] = cc[ik+j*idl1] + cc[ik-1+jc*idl1]; ch[ik+jc*idl1] = cc[ik+j*idl1] - cc[ik-1+jc*idl1]; } } *nac = 1; if (ido == 2) return; *nac = 0; for (ik = 0; ik < idl1; ik++) cc[ik] = ch[ik]; for (j = 1; j < ip; j++) { for (k = 0; k < l1; k++) { cc[(k+j*l1)*ido+0] = ch[(k+j*l1)*ido+0]; cc[(k+j*l1)*ido+1] = ch[(k+j*l1)*ido+1]; } } if (idot <= l1) { idij = 0; for (j = 1; j < ip; j++) { idij += 2; for (i = 3; i < ido; i += 2) { idij += 2; for (k = 0; k < l1; k++) { cc[i-1+(k+j*l1)*ido] = wa[idij-2] * ch[i-1+(k+j*l1)*ido] - isign * wa[idij-1] * ch[i+(k+j*l1)*ido]; cc[i+(k+j*l1)*ido] = wa[idij-2] * ch[i+(k+j*l1)*ido] + isign * wa[idij-1] * ch[i-1+(k+j*l1)*ido]; } } } } else { idj = 2 - ido; for (j = 1; j < ip; j++) { idj += ido; for (k = 0; k < l1; k++) { idij = idj; for (i = 3; i < ido; i += 2) { idij += 2; cc[i-1+(k+j*l1)*ido] = wa[idij-2] * ch[i-1+(k+j*l1)*ido] - isign * wa[idij-1] * ch[i+(k+j*l1)*ido]; cc[i+(k+j*l1)*ido] = wa[idij-2] * ch[i+(k+j*l1)*ido] + isign * wa[idij-1] * ch[i-1+(k+j*l1)*ido]; } } } } } /*---------------------------------------------------------------------- cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs. ----------------------------------------------------------------------*/ INLINE void cfftf1(uint16_t n, real_t *c, real_t *ch, real_t *wa, uint16_t *ifac, int8_t isign) { uint16_t idot, i; uint16_t k1, l1, l2; uint16_t na, nf, ip, iw, ix2, ix3, ix4, nac, ido, idl1; nf = ifac[1]; na = 0; l1 = 1; iw = 0; for (k1 = 2; k1 <= nf+1; k1++) { ip = ifac[k1]; l2 = ip*l1; ido = n / l2; idot = ido+ido; idl1 = idot*l1; switch (ip) { case 2: if (na == 0) passf2(idot, l1, c, ch, &wa[iw], isign); else passf2(idot, l1, ch, c, &wa[iw], isign); na = 1 - na; break; case 3: ix2 = iw + idot; if (na == 0) passf3(idot, l1, c, ch, &wa[iw], &wa[ix2], isign); else passf3(idot, l1, ch, c, &wa[iw], &wa[ix2], isign); na = 1 - na; break; case 4: ix2 = iw + idot; ix3 = ix2 + idot; if (na == 0) passf4(idot, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], isign); else passf4(idot, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], isign); na = 1 - na; break; case 5: ix2 = iw + idot; ix3 = ix2 + idot; ix4 = ix3 + idot; if (na == 0) passf5(idot, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); else passf5(idot, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); na = 1 - na; break; default: if (na == 0) passf(&nac, idot, ip, l1, idl1, c, ch, &wa[iw], isign); else passf(&nac, idot, ip, l1, idl1, ch, c, &wa[iw], isign); if (nac != 0) na = 1 - na; break; } l1 = l2; iw += (ip-1) * idot; } if (na == 0) return; for (i = 0; i < 2*n; i++) c[i] = ch[i]; } void cfftf(cfft_info cfft, real_t *c) { cfftf1(cfft.n, c, cfft.work, cfft.tab, cfft.ifac, -1); } void cfftb(cfft_info cfft, real_t *c) { cfftf1(cfft.n, c, cfft.work, cfft.tab, cfft.ifac, +1); } static void cffti1(uint16_t n, real_t *wa, uint16_t *ifac) { static uint16_t ntryh[4] = {3, 4, 2, 5}; real_t arg, argh, argld, fi; uint16_t idot, ntry, i, j; uint16_t i1, k1, l1, l2, ib; uint16_t ld, ii, nf, ip, nl, nq, nr; uint16_t ido, ipm; nl = n; nf = 0; j = 0; startloop: j++; if (j <= 4) ntry = ntryh[j-1]; else ntry += 2; do { nq = nl / ntry; nr = nl - ntry*nq; if (nr != 0) goto startloop; nf++; ifac[nf+1] = ntry; nl = nq; if (ntry == 2 && nf != 1) { for (i = 2; i <= nf; i++) { ib = nf - i + 2; ifac[ib+1] = ifac[ib]; } ifac[2] = 2; } } while (nl != 1); ifac[0] = n; ifac[1] = nf; argh = 2*M_PI / (real_t)n; i = 1; l1 = 1; for (k1 = 1; k1 <= nf; k1++) { ip = ifac[k1+1]; ld = 0; l2 = l1*ip; ido = n / l2; idot = ido + ido + 2; ipm = ip - 1; for (j = 1; j <= ipm; j++) { i1 = i; wa[i-1] = 1; wa[i] = 0; ld += l1; fi = 0; argld = ld*argh; for (ii = 4; ii <= idot; ii += 2) { i += 2; fi += 1; arg = fi*argld; wa[i-1] = cos(arg); wa[i] = sin(arg); } if (ip > 5) { wa[i1-1] = wa[i-1]; wa[i1] = wa[i]; } } l1 = l2; } } cfft_info cffti(uint16_t n) { cfft_info cfft; cfft.n = n; cfft.work = malloc(2*n*sizeof(real_t)); cfft.tab = malloc(2*n*sizeof(real_t)); cffti1(n, cfft.tab, cfft.ifac); return cfft; } void cfftu(cfft_info cfft) { if (cfft.work) free(cfft.work); if (cfft.tab) free(cfft.tab); }