ref: 1a8846b50eb297b9faa06e1273b1589a7ee833ca
dir: /libfaad/fftw/putils.c/
/* * Copyright (c) 1997-1999 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /* * putils.c -- plan utilities shared by planner.c and rplanner.c */ /* $Id: putils.c,v 1.1 2002/04/07 21:26:05 menno Exp $ */ #ifdef FFTW_USING_CILK #include <cilk.h> #include <cilk-compat.h> #endif #include <fftw-int.h> #include <stdlib.h> #include <stdio.h> int fftw_node_cnt = 0; int fftw_plan_cnt = 0; /* * These two constants are used for the FFTW_ESTIMATE flag to help * create a heuristic plan. They don't affect FFTW_MEASURE. */ #define NOTW_OPTIMAL_SIZE 32 #define TWIDDLE_OPTIMAL_SIZE 12 #define IS_POWER_OF_TWO(n) (((n) & ((n) - 1)) == 0) /* constructors --- I wish I had ML */ fftw_plan_node *fftw_make_node(void) { fftw_plan_node *p = (fftw_plan_node *) fftw_malloc(sizeof(fftw_plan_node)); p->refcnt = 0; fftw_node_cnt++; return p; } void fftw_use_node(fftw_plan_node *p) { ++p->refcnt; } fftw_plan_node *fftw_make_node_notw(int size, const fftw_codelet_desc *config) { fftw_plan_node *p = fftw_make_node(); p->type = config->type; p->nodeu.notw.size = size; p->nodeu.notw.codelet = (fftw_notw_codelet *) config->codelet; p->nodeu.notw.codelet_desc = config; return p; } fftw_plan_node *fftw_make_node_real2hc(int size, const fftw_codelet_desc *config) { fftw_plan_node *p = fftw_make_node(); p->type = config->type; p->nodeu.real2hc.size = size; p->nodeu.real2hc.codelet = (fftw_real2hc_codelet *) config->codelet; p->nodeu.real2hc.codelet_desc = config; return p; } fftw_plan_node *fftw_make_node_hc2real(int size, const fftw_codelet_desc *config) { fftw_plan_node *p = fftw_make_node(); p->type = config->type; p->nodeu.hc2real.size = size; p->nodeu.hc2real.codelet = (fftw_hc2real_codelet *) config->codelet; p->nodeu.hc2real.codelet_desc = config; return p; } fftw_plan_node *fftw_make_node_twiddle(int n, const fftw_codelet_desc *config, fftw_plan_node *recurse, int flags) { fftw_plan_node *p = fftw_make_node(); p->type = config->type; p->nodeu.twiddle.size = config->size; p->nodeu.twiddle.codelet = (fftw_twiddle_codelet *) config->codelet; p->nodeu.twiddle.recurse = recurse; p->nodeu.twiddle.codelet_desc = config; fftw_use_node(recurse); if (flags & FFTW_MEASURE) p->nodeu.twiddle.tw = fftw_create_twiddle(n, config); else p->nodeu.twiddle.tw = 0; return p; } fftw_plan_node *fftw_make_node_hc2hc(int n, fftw_direction dir, const fftw_codelet_desc *config, fftw_plan_node *recurse, int flags) { fftw_plan_node *p = fftw_make_node(); p->type = config->type; p->nodeu.hc2hc.size = config->size; p->nodeu.hc2hc.dir = dir; p->nodeu.hc2hc.codelet = (fftw_hc2hc_codelet *) config->codelet; p->nodeu.hc2hc.recurse = recurse; p->nodeu.hc2hc.codelet_desc = config; fftw_use_node(recurse); if (flags & FFTW_MEASURE) p->nodeu.hc2hc.tw = fftw_create_twiddle(n, config); else p->nodeu.hc2hc.tw = 0; return p; } fftw_plan_node *fftw_make_node_generic(int n, int size, fftw_generic_codelet *codelet, fftw_plan_node *recurse, int flags) { fftw_plan_node *p = fftw_make_node(); p->type = FFTW_GENERIC; p->nodeu.generic.size = size; p->nodeu.generic.codelet = codelet; p->nodeu.generic.recurse = recurse; fftw_use_node(recurse); if (flags & FFTW_MEASURE) p->nodeu.generic.tw = fftw_create_twiddle(n, (const fftw_codelet_desc *) 0); else p->nodeu.generic.tw = 0; return p; } fftw_plan_node *fftw_make_node_rgeneric(int n, int size, fftw_direction dir, fftw_rgeneric_codelet *codelet, fftw_plan_node *recurse, int flags) { fftw_plan_node *p = fftw_make_node(); if (size % 2 == 0 || (n / size) % 2 == 0) fftw_die("invalid size for rgeneric codelet\n"); p->type = FFTW_RGENERIC; p->nodeu.rgeneric.size = size; p->nodeu.rgeneric.dir = dir; p->nodeu.rgeneric.codelet = codelet; p->nodeu.rgeneric.recurse = recurse; fftw_use_node(recurse); if (flags & FFTW_MEASURE) p->nodeu.rgeneric.tw = fftw_create_twiddle(n, (const fftw_codelet_desc *) 0); else p->nodeu.rgeneric.tw = 0; return p; } /* * Note that these two Rader-related things must go here, rather than * in rader.c, in order that putils.c (and rplanner.c) won't depend * upon rader.c. */ fftw_rader_data *fftw_rader_top = NULL; static void fftw_destroy_rader(fftw_rader_data * d) { if (d) { d->refcount--; if (d->refcount <= 0) { fftw_rader_data *cur = fftw_rader_top, *prev = NULL; while (cur && cur != d) { prev = cur; cur = cur->next; } if (!cur) fftw_die("invalid Rader data pointer\n"); if (prev) prev->next = d->next; else fftw_rader_top = d->next; fftw_destroy_plan_internal(d->plan); fftw_free(d->omega); fftw_free(d->cdesc); fftw_free(d); } } } static void destroy_tree(fftw_plan_node *p) { if (p) { --p->refcnt; if (p->refcnt == 0) { switch (p->type) { case FFTW_NOTW: case FFTW_REAL2HC: case FFTW_HC2REAL: break; case FFTW_TWIDDLE: if (p->nodeu.twiddle.tw) fftw_destroy_twiddle(p->nodeu.twiddle.tw); destroy_tree(p->nodeu.twiddle.recurse); break; case FFTW_HC2HC: if (p->nodeu.hc2hc.tw) fftw_destroy_twiddle(p->nodeu.hc2hc.tw); destroy_tree(p->nodeu.hc2hc.recurse); break; case FFTW_GENERIC: if (p->nodeu.generic.tw) fftw_destroy_twiddle(p->nodeu.generic.tw); destroy_tree(p->nodeu.generic.recurse); break; case FFTW_RADER: if (p->nodeu.rader.tw) fftw_destroy_twiddle(p->nodeu.rader.tw); if (p->nodeu.rader.rader_data) fftw_destroy_rader(p->nodeu.rader.rader_data); destroy_tree(p->nodeu.rader.recurse); break; case FFTW_RGENERIC: if (p->nodeu.rgeneric.tw) fftw_destroy_twiddle(p->nodeu.rgeneric.tw); destroy_tree(p->nodeu.rgeneric.recurse); break; } fftw_free(p); fftw_node_cnt--; } } } /* create a plan with twiddle factors, and other bells and whistles */ fftw_plan fftw_make_plan(int n, fftw_direction dir, fftw_plan_node *root, int flags, enum fftw_node_type wisdom_type, int wisdom_signature, fftw_recurse_kind recurse_kind, int vector_size) { fftw_plan p = (fftw_plan) fftw_malloc(sizeof(struct fftw_plan_struct)); p->n = n; p->dir = dir; p->flags = flags; fftw_use_node(root); p->root = root; p->cost = 0.0; p->wisdom_type = wisdom_type; p->wisdom_signature = wisdom_signature; p->recurse_kind = recurse_kind; p->vector_size = vector_size; if (recurse_kind == FFTW_VECTOR_RECURSE && vector_size > 1) fftw_die("invalid vector-recurse plan attempted\n"); p->next = (fftw_plan) 0; p->refcnt = 0; fftw_plan_cnt++; return p; } /* * complete with twiddle factors (because nodes don't have * them when FFTW_ESTIMATE is set) */ void fftw_complete_twiddle(fftw_plan_node *p, int n) { int r; switch (p->type) { case FFTW_NOTW: case FFTW_REAL2HC: case FFTW_HC2REAL: break; case FFTW_TWIDDLE: r = p->nodeu.twiddle.size; if (!p->nodeu.twiddle.tw) p->nodeu.twiddle.tw = fftw_create_twiddle(n, p->nodeu.twiddle.codelet_desc); fftw_complete_twiddle(p->nodeu.twiddle.recurse, n / r); break; case FFTW_HC2HC: r = p->nodeu.hc2hc.size; if (!p->nodeu.hc2hc.tw) p->nodeu.hc2hc.tw = fftw_create_twiddle(n, p->nodeu.hc2hc.codelet_desc); fftw_complete_twiddle(p->nodeu.hc2hc.recurse, n / r); break; case FFTW_GENERIC: r = p->nodeu.generic.size; if (!p->nodeu.generic.tw) p->nodeu.generic.tw = fftw_create_twiddle(n, (const fftw_codelet_desc *) 0); fftw_complete_twiddle(p->nodeu.generic.recurse, n / r); break; case FFTW_RADER: r = p->nodeu.rader.size; if (!p->nodeu.rader.tw) p->nodeu.rader.tw = fftw_create_twiddle(n, p->nodeu.rader.rader_data->cdesc); fftw_complete_twiddle(p->nodeu.rader.recurse, n / r); break; case FFTW_RGENERIC: r = p->nodeu.rgeneric.size; if (!p->nodeu.rgeneric.tw) p->nodeu.rgeneric.tw = fftw_create_twiddle(n, (const fftw_codelet_desc *) 0); fftw_complete_twiddle(p->nodeu.rgeneric.recurse, n / r); break; } } void fftw_use_plan(fftw_plan p) { ++p->refcnt; } void fftw_destroy_plan_internal(fftw_plan p) { --p->refcnt; if (p->refcnt == 0) { destroy_tree(p->root); fftw_plan_cnt--; fftw_free(p); } } /* end of constructors */ /* management of plan tables */ void fftw_make_empty_table(fftw_plan *table) { *table = (fftw_plan) 0; } void fftw_insert(fftw_plan *table, fftw_plan this_plan) { fftw_use_plan(this_plan); this_plan->next = *table; *table = this_plan; } fftw_plan fftw_lookup(fftw_plan *table, int n, int flags, int vector_size) { fftw_plan p; for (p = *table; p && (p->n != n || p->flags != flags || p->vector_size != vector_size); p = p->next); return p; } void fftw_destroy_table(fftw_plan *table) { fftw_plan p, q; for (p = *table; p; p = q) { q = p->next; fftw_destroy_plan_internal(p); } } double fftw_estimate_node(fftw_plan_node *p) { int k; switch (p->type) { case FFTW_NOTW: k = p->nodeu.notw.size; goto common1; case FFTW_REAL2HC: k = p->nodeu.real2hc.size; goto common1; case FFTW_HC2REAL: k = p->nodeu.hc2real.size; common1: return 1.0 + 0.1 * (k - NOTW_OPTIMAL_SIZE) * (k - NOTW_OPTIMAL_SIZE); case FFTW_TWIDDLE: k = p->nodeu.twiddle.size; return 1.0 + 0.1 * (k - TWIDDLE_OPTIMAL_SIZE) * (k - TWIDDLE_OPTIMAL_SIZE) + fftw_estimate_node(p->nodeu.twiddle.recurse); case FFTW_HC2HC: k = p->nodeu.hc2hc.size; return 1.0 + 0.1 * (k - TWIDDLE_OPTIMAL_SIZE) * (k - TWIDDLE_OPTIMAL_SIZE) + fftw_estimate_node(p->nodeu.hc2hc.recurse); case FFTW_GENERIC: k = p->nodeu.generic.size; return 10.0 + k * k + fftw_estimate_node(p->nodeu.generic.recurse); case FFTW_RADER: k = p->nodeu.rader.size; return 10.0 + 10 * k + fftw_estimate_node(p->nodeu.rader.recurse); case FFTW_RGENERIC: k = p->nodeu.rgeneric.size; return 10.0 + k * k + fftw_estimate_node(p->nodeu.rgeneric.recurse); } return 1.0E20; } /* pick the better of two plans and destroy the other one. */ fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2) { if (!p1) return p2; if (!p2) return p1; if (p1->cost > p2->cost) { fftw_destroy_plan_internal(p1); return p2; } else { fftw_destroy_plan_internal(p2); return p1; } } /* find the smallest prime factor of n */ int fftw_factor(int n) { int r; /* try 2 */ if ((n & 1) == 0) return 2; /* try odd numbers up to sqrt(n) */ for (r = 3; r * r <= n; r += 2) if (n % r == 0) return r; /* n is prime */ return n; } static void print_node(FILE *f, fftw_plan_node *p, int indent) { if (p) { switch (p->type) { case FFTW_NOTW: fprintf(f, "%*sFFTW_NOTW %d\n", indent, "", p->nodeu.notw.size); break; case FFTW_REAL2HC: fprintf(f, "%*sFFTW_REAL2HC %d\n", indent, "", p->nodeu.real2hc.size); break; case FFTW_HC2REAL: fprintf(f, "%*sFFTW_HC2REAL %d\n", indent, "", p->nodeu.hc2real.size); break; case FFTW_TWIDDLE: fprintf(f, "%*sFFTW_TWIDDLE %d\n", indent, "", p->nodeu.twiddle.size); print_node(f, p->nodeu.twiddle.recurse, indent); break; case FFTW_HC2HC: fprintf(f, "%*sFFTW_HC2HC %d\n", indent, "", p->nodeu.hc2hc.size); print_node(f, p->nodeu.hc2hc.recurse, indent); break; case FFTW_GENERIC: fprintf(f, "%*sFFTW_GENERIC %d\n", indent, "", p->nodeu.generic.size); print_node(f, p->nodeu.generic.recurse, indent); break; case FFTW_RADER: fprintf(f, "%*sFFTW_RADER %d\n", indent, "", p->nodeu.rader.size); fprintf(f, "%*splan for size %d convolution:\n", indent + 4, "", p->nodeu.rader.size - 1); print_node(f, p->nodeu.rader.rader_data->plan->root, indent + 6); print_node(f, p->nodeu.rader.recurse, indent); break; case FFTW_RGENERIC: fprintf(f, "%*sFFTW_RGENERIC %d\n", indent, "", p->nodeu.rgeneric.size); print_node(f, p->nodeu.rgeneric.recurse, indent); break; } } } void fftw_fprint_plan(FILE *f, fftw_plan p) { fprintf(f, "plan: (cost = %e)\n", p->cost); if (p->recurse_kind == FFTW_VECTOR_RECURSE) fprintf(f, "(vector recursion)\n"); else if (p->vector_size > 1) fprintf(f, "(vector-size %d)\n", p->vector_size); print_node(f, p->root, 0); } void fftw_print_plan(fftw_plan p) { fftw_fprint_plan(stdout, p); } size_t fftw_sizeof_fftw_real(void) { return(sizeof(fftw_real)); }