NFFT Logo 3.2.3
nfsft.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id: nfsft.c 3896 2012-10-10 12:19:26Z tovo $ */
20 
27 #include "config.h"
28 
29 /* Include standard C headers. */
30 #include <math.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #ifdef HAVE_COMPLEX_H
34 #include <complex.h>
35 #endif
36 
37 #ifdef _OPENMP
38 #include <omp.h>
39 #endif
40 
41 /* Include NFFT3 utilities header. */
42 #include "nfft3util.h"
43 
44 /* Include NFFT3 library header. */
45 #include "nfft3.h"
46 
47 #include "infft.h"
48 
49 /* Include private associated Legendre functions header. */
50 #include "legendre.h"
51 
52 /* Include private API header. */
53 #include "api.h"
54 
55 
65 #define NFSFT_DEFAULT_NFFT_CUTOFF 6
66 
72 #define NFSFT_DEFAULT_THRESHOLD 1000
73 
79 #define NFSFT_BREAK_EVEN 5
80 
87 static struct nfsft_wisdom wisdom = {false,0U,-1,-1,0,0,0,0,0};
88 
111 static inline void c2e(nfsft_plan *plan)
112 {
113  int k;
114  int n;
115  double _Complex last;
116  double _Complex act;
117  double _Complex *xp;
118  double _Complex *xm;
119  int low;
120  int up;
121  int lowe;
122  int upe;
124  /* Set the first row to order to zero since it is unused. */
125  memset(plan->f_hat_intern,0U,(2*plan->N+2)*sizeof(double _Complex));
126 
127  /* Determine lower and upper bounds for loop processing even terms. */
128  lowe = -plan->N + (plan->N%2);
129  upe = -lowe;
130 
131  /* Process even terms. */
132  for (n = lowe; n <= upe; n += 2)
133  {
134  /* Compute new coefficients \f$\left(c_k^n\right)_{k=-M,\ldots,M}\f$ from
135  * old coefficients $\left(b_k^n\right)_{k=0,\ldots,M}$. */
136  xm = &(plan->f_hat_intern[NFSFT_INDEX(-1,n,plan)]);
137  xp = &(plan->f_hat_intern[NFSFT_INDEX(+1,n,plan)]);
138  for(k = 1; k <= plan->N; k++)
139  {
140  *xp *= 0.5;
141  *xm-- = *xp++;
142  }
143  /* Set the first coefficient in the array corresponding to this order to
144  * zero since it is unused. */
145  *xm = 0.0;
146  }
147 
148  /* Determine lower and upper bounds for loop processing odd terms. */
149  low = -plan->N + (1-plan->N%2);
150  up = -low;
151 
152  /* Process odd terms incorporating the additional sine term
153  * \f$\sin \vartheta\f$. */
154  for (n = low; n <= up; n += 2)
155  {
156  /* Compute new coefficients \f$\left(c_k^n\right)_{k=-M,\ldots,M}\f$ from
157  * old coefficients $\left(b_k^n\right)_{k=0,\ldots,M-1}$ incorporating
158  * the additional term \f$\sin \vartheta\f$. */
159  plan->f_hat_intern[NFSFT_INDEX(0,n,plan)] *= 2.0;
160  xp = &(plan->f_hat_intern[NFSFT_INDEX(-plan->N-1,n,plan)]);
161  /* Set the first coefficient in the array corresponding to this order to zero
162  * since it is unused. */
163  *xp++ = 0.0;
164  xm = &(plan->f_hat_intern[NFSFT_INDEX(plan->N,n,plan)]);
165  last = *xm;
166  *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
167  *xp++ = -(*xm--);
168  for (k = plan->N-1; k > 0; k--)
169  {
170  act = *xm;
171  *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
172  *xp++ = -(*xm--);
173  last = act;
174  }
175  *xm = 0.0;
176  }
177 }
178 
189 static inline void c2e_transposed(nfsft_plan *plan)
190 {
191  int k;
192  int n;
193  double _Complex last;
194  double _Complex act;
195  double _Complex *xp;
196  double _Complex *xm;
197  int low;
198  int up;
199  int lowe;
200  int upe;
202  /* Determine lower and upper bounds for loop processing even terms. */
203  lowe = -plan->N + (plan->N%2);
204  upe = -lowe;
205 
206  /* Process even terms. */
207  for (n = lowe; n <= upe; n += 2)
208  {
209  /* Compute new coefficients \f$\left(b_k^n\right)_{k=0,\ldots,M}\f$ from
210  * old coefficients $\left(c_k^n\right)_{k=-M,\ldots,M}$. */
211  xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
212  xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
213  for(k = 1; k <= plan->N; k++)
214  {
215  *xp += *xm--;
216  *xp++ *= 0.5;;
217  }
218  }
219 
220  /* Determine lower and upper bounds for loop processing odd terms. */
221  low = -plan->N + (1-plan->N%2);
222  up = -low;
223 
224  /* Process odd terms. */
225  for (n = low; n <= up; n += 2)
226  {
227  /* Compute new coefficients \f$\left(b_k^n\right)_{k=0,\ldots,M-1}\f$ from
228  * old coefficients $\left(c_k^n\right)_{k=0,\ldots,M-1}$. */
229  xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
230  xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
231  for(k = 1; k <= plan->N; k++)
232  {
233  *xp++ -= *xm--;
234  }
235 
236  plan->f_hat[NFSFT_INDEX(0,n,plan)] =
237  -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(1,n,plan)];
238  last = plan->f_hat[NFSFT_INDEX(1,n,plan)];
239  plan->f_hat[NFSFT_INDEX(1,n,plan)] =
240  -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(2,n,plan)];
241 
242  xp = &(plan->f_hat[NFSFT_INDEX(2,n,plan)]);
243  for (k = 2; k < plan->N; k++)
244  {
245  act = *xp;
246  *xp = -0.25 * _Complex_I * (xp[1] - last);
247  xp++;
248  last = act;
249  }
250  *xp = 0.25 * _Complex_I * last;
251 
252  plan->f_hat[NFSFT_INDEX(0,n,plan)] *= 2.0;
253  }
254 }
255 
256 void nfsft_init(nfsft_plan *plan, int N, int M)
257 {
258  /* Call nfsft_init_advanced with default flags. */
259  nfsft_init_advanced(plan, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
260  NFSFT_MALLOC_F_HAT);
261 }
262 
263 void nfsft_init_advanced(nfsft_plan* plan, int N, int M,
264  unsigned int flags)
265 {
266  /* Call nfsft_init_guru with the flags and default NFFT cut-off. */
267  nfsft_init_guru(plan, N, M, flags, PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
268  FFT_OUT_OF_PLACE, NFSFT_DEFAULT_NFFT_CUTOFF);
269 }
270 
271 void nfsft_init_guru(nfsft_plan *plan, int N, int M, unsigned int flags,
272  unsigned int nfft_flags, int nfft_cutoff)
273 {
274  int *nfft_size; /*< NFFT size */
275  int *fftw_size; /*< FFTW size */
276 
277  /* Save the flags in the plan. */
278  plan->flags = flags;
279 
280  /* Save the bandwidth N and the number of samples M in the plan. */
281  plan->N = N;
282  plan->M_total = M;
283 
284  /* Calculate the next greater power of two with respect to the bandwidth N
285  * and the corresponding exponent. */
286  //next_power_of_2_exp(plan->N,&plan->NPT,&plan->t);
287 
288  /* Save length of array of Fourier coefficients. Owing to the data layout the
289  * length is (2N+2)(2N+2) */
290  plan->N_total = (2*plan->N+2)*(2*plan->N+2);
291 
292  /* Allocate memory for auxilliary array of spherical Fourier coefficients,
293  * if neccesary. */
294  if (plan->flags & NFSFT_PRESERVE_F_HAT)
295  {
296  plan->f_hat_intern = (double _Complex*) nfft_malloc(plan->N_total*
297  sizeof(double _Complex));
298  }
299 
300  /* Allocate memory for spherical Fourier coefficients, if neccesary. */
301  if (plan->flags & NFSFT_MALLOC_F_HAT)
302  {
303  plan->f_hat = (double _Complex*) nfft_malloc(plan->N_total*
304  sizeof(double _Complex));
305  }
306 
307  /* Allocate memory for samples, if neccesary. */
308  if (plan->flags & NFSFT_MALLOC_F)
309  {
310  plan->f = (double _Complex*) nfft_malloc(plan->M_total*sizeof(double _Complex));
311  }
312 
313  /* Allocate memory for nodes, if neccesary. */
314  if (plan->flags & NFSFT_MALLOC_X)
315  {
316  plan->x = (double*) nfft_malloc(plan->M_total*2*sizeof(double));
317  }
318 
319  /* Check if fast algorithm is activated. */
320  if (plan->flags & NFSFT_NO_FAST_ALGORITHM)
321  {
322  }
323  else
324  {
325  nfft_size = (int*)nfft_malloc(2*sizeof(int));
326  fftw_size = (int*)nfft_malloc(2*sizeof(int));
327 
329  nfft_size[0] = 2*plan->N+2;
330  nfft_size[1] = 2*plan->N+2;
331  fftw_size[0] = 4*plan->N;
332  fftw_size[1] = 4*plan->N;
333 
335  nfft_init_guru(&plan->plan_nfft, 2, nfft_size, plan->M_total, fftw_size,
336  nfft_cutoff, nfft_flags,
337  FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
338 
339  /* Assign angle array. */
340  plan->plan_nfft.x = plan->x;
341  /* Assign result array. */
342  plan->plan_nfft.f = plan->f;
343  /* Assign Fourier coefficients array. */
344  plan->plan_nfft.f_hat = plan->f_hat;
345 
348  /* Precompute. */
349  //nfft_precompute_one_psi(&plan->plan_nfft);
350 
351  /* Free auxilliary arrays. */
352  nfft_free(nfft_size);
353  nfft_free(fftw_size);
354  }
355 
356  plan->mv_trafo = (void (*) (void* ))nfsft_trafo;
357  plan->mv_adjoint = (void (*) (void* ))nfsft_adjoint;
358 }
359 
360 void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags,
361  unsigned int fpt_flags)
362 {
363  int n; /*< The order n */
364 
365  /* Check if already initialized. */
366  if (wisdom.initialized == true)
367  {
368  return;
369  }
370 
371 #ifdef _OPENMP
372  #pragma omp parallel default(shared)
373  {
374  int nthreads = omp_get_num_threads();
375  int threadid = omp_get_thread_num();
376  #pragma omp single
377  {
378  wisdom.nthreads = nthreads;
379  }
380  }
381 #endif
382 
383  /* Save the precomputation flags. */
384  wisdom.flags = nfsft_flags;
385 
386  /* Compute and save N_max = 2^{\ceil{log_2 N}} as next greater
387  * power of two with respect to N. */
388  X(next_power_of_2_exp)(N,&wisdom.N_MAX,&wisdom.T_MAX);
389 
390  /* Check, if precomputation for direct algorithms needs to be performed. */
391  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
392  {
393  wisdom.alpha = NULL;
394  wisdom.beta = NULL;
395  wisdom.gamma = NULL;
396  }
397  else
398  {
399  /* Allocate memory for three-term recursion coefficients. */
400  wisdom.alpha = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
401  sizeof(double));
402  wisdom.beta = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
403  sizeof(double));
404  wisdom.gamma = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
405  sizeof(double));
407  /* Compute three-term recurrence coefficients alpha_k^n, beta_k^n, and
408  * gamma_k^n. */
409  alpha_al_all(wisdom.alpha,wisdom.N_MAX);
410  beta_al_all(wisdom.beta,wisdom.N_MAX);
411  gamma_al_all(wisdom.gamma,wisdom.N_MAX);
412  }
413 
414  /* Check, if precomputation for fast algorithms needs to be performed. */
415  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
416  {
417  }
418  else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
419  {
420  /* Precompute data for DPT/FPT. */
421 
422  /* Check, if recursion coefficients have already been calculated. */
423  if (wisdom.alpha != NULL)
424  {
425 #ifdef _OPENMP
426  #pragma omp parallel default(shared) private(n)
427  {
428  int nthreads = omp_get_num_threads();
429  int threadid = omp_get_thread_num();
430  #pragma omp single
431  {
432  wisdom.nthreads = nthreads;
433  wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
434  }
435 
436  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
437  fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
438  for (n = 0; n <= wisdom.N_MAX; n++)
439  fpt_precompute(wisdom.set_threads[threadid],n,&wisdom.alpha[ROW(n)],
440  &wisdom.beta[ROW(n)], &wisdom.gamma[ROW(n)],n,kappa);
441  }
442 
443 #else
444  /* Use the recursion coefficients to precompute FPT data using persistent
445  * arrays. */
446  wisdom.set = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
447  fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
448  for (n = 0; n <= wisdom.N_MAX; n++)
449  {
450  /*fprintf(stderr,"%d\n",n);
451  fflush(stderr);*/
452  /* Precompute data for FPT transformation for order n. */
453  fpt_precompute(wisdom.set,n,&wisdom.alpha[ROW(n)],&wisdom.beta[ROW(n)],
454  &wisdom.gamma[ROW(n)],n,kappa);
455  }
456 #endif
457  }
458  else
459  {
460 #ifdef _OPENMP
461  #pragma omp parallel default(shared) private(n)
462  {
463  double *alpha, *beta, *gamma;
464  int nthreads = omp_get_num_threads();
465  int threadid = omp_get_thread_num();
466  #pragma omp single
467  {
468  wisdom.nthreads = nthreads;
469  wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
470  }
471 
472  alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
473  beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
474  gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
475  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
476  fpt_flags | FPT_AL_SYMMETRY);
477 
478  for (n = 0; n <= wisdom.N_MAX; n++)
479  {
480  alpha_al_row(alpha,wisdom.N_MAX,n);
481  beta_al_row(beta,wisdom.N_MAX,n);
482  gamma_al_row(gamma,wisdom.N_MAX,n);
483 
484  /* Precompute data for FPT transformation for order n. */
485  fpt_precompute(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
486  kappa);
487  }
488  /* Free auxilliary arrays. */
489  nfft_free(alpha);
490  nfft_free(beta);
491  nfft_free(gamma);
492  }
493 #else
494  /* Allocate memory for three-term recursion coefficients. */
495  wisdom.alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
496  wisdom.beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
497  wisdom.gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
498  wisdom.set = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
499  fpt_flags | FPT_AL_SYMMETRY);
500  for (n = 0; n <= wisdom.N_MAX; n++)
501  {
502  /*fprintf(stderr,"%d NO_DIRECT\n",n);
503  fflush(stderr);*/
504  /* Compute three-term recurrence coefficients alpha_k^n, beta_k^n, and
505  * gamma_k^n. */
506  alpha_al_row(wisdom.alpha,wisdom.N_MAX,n);
507  beta_al_row(wisdom.beta,wisdom.N_MAX,n);
508  gamma_al_row(wisdom.gamma,wisdom.N_MAX,n);
509 
510  /* Precompute data for FPT transformation for order n. */
511  fpt_precompute(wisdom.set,n,wisdom.alpha,wisdom.beta,wisdom.gamma,n,
512  kappa);
513  }
514  /* Free auxilliary arrays. */
515  nfft_free(wisdom.alpha);
516  nfft_free(wisdom.beta);
517  nfft_free(wisdom.gamma);
518 #endif
519  wisdom.alpha = NULL;
520  wisdom.beta = NULL;
521  wisdom.gamma = NULL;
522  }
523  }
524 
525  /* Wisdom has been initialised. */
526  wisdom.initialized = true;
527 }
528 
529 void nfsft_forget(void)
530 {
531  /* Check if wisdom has been initialised. */
532  if (wisdom.initialized == false)
533  {
534  /* Nothing to do. */
535  return;
536  }
537 
538  /* Check, if precomputation for direct algorithms has been performed. */
539  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
540  {
541  }
542  else
543  {
544  /* Free arrays holding three-term recurrence coefficients. */
545  nfft_free(wisdom.alpha);
546  nfft_free(wisdom.beta);
547  nfft_free(wisdom.gamma);
548  wisdom.alpha = NULL;
549  wisdom.beta = NULL;
550  wisdom.gamma = NULL;
551  }
552 
553  /* Check, if precomputation for fast algorithms has been performed. */
554  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
555  {
556  }
557  else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
558  {
559 #ifdef _OPENMP
560  int k;
561  for (k = 0; k < wisdom.nthreads; k++)
562  fpt_finalize(wisdom.set_threads[k]);
563  nfft_free(wisdom.set_threads);
564 #else
565  /* Free precomputed data for FPT transformation. */
566  fpt_finalize(wisdom.set);
567 #endif
568  }
569 
570  /* Wisdom is now uninitialised. */
571  wisdom.initialized = false;
572 }
573 
574 
575 void nfsft_finalize(nfsft_plan *plan)
576 {
577  if (!plan)
578  return;
579 
580  /* Finalise the nfft plan. */
581  nfft_finalize(&plan->plan_nfft);
582 
583  /* De-allocate memory for auxilliary array of spherical Fourier coefficients,
584  * if neccesary. */
585  if (plan->flags & NFSFT_PRESERVE_F_HAT)
586  {
587  nfft_free(plan->f_hat_intern);
588  }
589 
590  /* De-allocate memory for spherical Fourier coefficients, if necessary. */
591  if (plan->flags & NFSFT_MALLOC_F_HAT)
592  {
593  //fprintf(stderr,"deallocating f_hat\n");
594  nfft_free(plan->f_hat);
595  }
596 
597  /* De-allocate memory for samples, if neccesary. */
598  if (plan->flags & NFSFT_MALLOC_F)
599  {
600  //fprintf(stderr,"deallocating f\n");
601  nfft_free(plan->f);
602  }
603 
604  /* De-allocate memory for nodes, if neccesary. */
605  if (plan->flags & NFSFT_MALLOC_X)
606  {
607  //fprintf(stderr,"deallocating x\n");
608  nfft_free(plan->x);
609  }
610 }
611 
612 void nfsft_trafo_direct(nfsft_plan *plan)
613 {
614  int m; /*< The node index */
615  int k; /*< The degree k */
616  int n; /*< The order n */
617  int n_abs; /*< The absolute value of the order n, ie n_abs = |n| */
618  double *alpha; /*< Pointer to current three-term recurrence
619  coefficient alpha_k^n for associated Legendre
620  functions P_k^n */
621  double *gamma; /*< Pointer to current three-term recurrence
622  coefficient beta_k^n for associated Legendre
623  functions P_k^n */
624  double _Complex *a; /*< Pointer to auxilliary array for Clenshaw algor. */
625  double _Complex it1; /*< Auxilliary variable for Clenshaw algorithm */
626  double _Complex it2; /*< Auxilliary variable for Clenshaw algorithm */
627  double _Complex temp; /*< Auxilliary variable for Clenshaw algorithm */
628  double _Complex f_m; /*< The final function value f_m = f(x_m) for a
629  single node. */
630  double stheta; /*< Current angle theta for Clenshaw algorithm */
631  double sphi; /*< Current angle phi for Clenshaw algorithm */
632 
633 #ifdef MEASURE_TIME
634  plan->MEASURE_TIME_t[0] = 0.0;
635  plan->MEASURE_TIME_t[1] = 0.0;
636  plan->MEASURE_TIME_t[2] = 0.0;
637 #endif
638 
639  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
640  {
641  return;
642  }
643 
644  /* Copy spherical Fourier coefficients, if necessary. */
645  if (plan->flags & NFSFT_PRESERVE_F_HAT)
646  {
647  memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
648  sizeof(double _Complex));
649  }
650  else
651  {
652  plan->f_hat_intern = plan->f_hat;
653  }
654 
655  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
656  * multiply spherical Fourier coefficients with corresponding normalization
657  * weight. */
658  if (plan->flags & NFSFT_NORMALIZED)
659  {
660  /* Traverse Fourier coefficients array. */
661  #pragma omp parallel for default(shared) private(k,n)
662  for (k = 0; k <= plan->N; k++)
663  {
664  for (n = -k; n <= k; n++)
665  {
666  /* Multiply with normalization weight. */
667  plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
668  sqrt((2*k+1)/(4.0*PI));
669  }
670  }
671  }
672 
673  /* Distinguish by bandwidth M. */
674  if (plan->N == 0)
675  {
676  /* N = 0 */
677 
678  /* Constant function */
679  for (m = 0; m < plan->M_total; m++)
680  {
681  plan->f[m] = plan->f_hat_intern[NFSFT_INDEX(0,0,plan)];
682  }
683  }
684  else
685  {
686  /* N > 0 */
687 
688  /* Evaluate
689  * \sum_{k=0}^N \sum_{n=-k}^k a_k^n P_k^{|n|}(cos theta_m) e^{i n phi_m}
690  * = \sum_{n=-N}^N \sum_{k=|n|}^N a_k^n P_k^{|n|}(cos theta_m)
691  * e^{i n phi_m}.
692  */
693  #pragma omp parallel for default(shared) private(m,stheta,sphi,f_m,n,a,n_abs,alpha,gamma,it2,it1,k,temp)
694  for (m = 0; m < plan->M_total; m++)
695  {
696  /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
697  stheta = cos(2.0*PI*plan->x[2*m+1]);
698  /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
699  sphi = 2.0*PI*plan->x[2*m];
700 
701  /* Initialize result for current node. */
702  f_m = 0.0;
703 
704  /* For n = -N,...,N, evaluate
705  * b_n := \sum_{k=|n|}^N a_k^n P_k^{|n|}(cos theta_m)
706  * using Clenshaw's algorithm.
707  */
708  for (n = -plan->N; n <= plan->N; n++)
709  {
710  /* Get pointer to Fourier coefficients vector for current order n. */
711  a = &(plan->f_hat_intern[NFSFT_INDEX(0,n,plan)]);
712 
713  /* Take absolute value of n. */
714  n_abs = abs(n);
715 
716  /* Get pointers to three-term recurrence coefficients arrays. */
717  alpha = &(wisdom.alpha[ROW(n_abs)]);
718  gamma = &(wisdom.gamma[ROW(n_abs)]);
719 
720  /* Clenshaw's algorithm */
721  it2 = a[plan->N];
722  it1 = a[plan->N-1];
723  for (k = plan->N; k > n_abs + 1; k--)
724  {
725  temp = a[k-2] + it2 * gamma[k];
726  it2 = it1 + it2 * alpha[k] * stheta;
727  it1 = temp;
728  }
729 
730  /* Compute final step if neccesary. */
731  if (n_abs < plan->N)
732  {
733  it2 = it1 + it2 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
734  }
735 
736  /* Compute final result by multiplying the fixed part
737  * gamma_|n| (1-cos^2(theta))^{|n|/2}
738  * for order n and the exponential part
739  * e^{i n phi}
740  * and add the result to f_m.
741  */
742  f_m += it2 * wisdom.gamma[ROW(n_abs)] *
743  pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
744  }
745 
746  /* Write result f_m for current node to array f. */
747  plan->f[m] = f_m;
748  }
749  }
750 }
751 
752 void nfsft_adjoint_direct(nfsft_plan *plan)
753 {
754  int m; /*< The node index */
755  int k; /*< The degree k */
756  int n; /*< The order n */
757  int n_abs; /*< The absolute value of the order n, ie n_abs = |n| */
758  double *alpha; /*< Pointer to current three-term recurrence
759  coefficient alpha_k^n for associated Legendre
760  functions P_k^n */
761  double *gamma; /*< Pointer to current three-term recurrence
762  coefficient beta_k^n for associated Legendre
763  functions P_k^n */
764  double _Complex it1; /*< Auxilliary variable for Clenshaw algorithm */
765  double _Complex it2; /*< Auxilliary variable for Clenshaw algorithm */
766  double _Complex temp; /*< Auxilliary variable for Clenshaw algorithm */
767  double stheta; /*< Current angle theta for Clenshaw algorithm */
768  double sphi; /*< Current angle phi for Clenshaw algorithm */
769 
770 #ifdef MEASURE_TIME
771  plan->MEASURE_TIME_t[0] = 0.0;
772  plan->MEASURE_TIME_t[1] = 0.0;
773  plan->MEASURE_TIME_t[2] = 0.0;
774 #endif
775 
776  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
777  {
778  return;
779  }
780 
781  /* Initialise spherical Fourier coefficients array with zeros. */
782  memset(plan->f_hat,0U,plan->N_total*sizeof(double _Complex));
783 
784  /* Distinguish by bandwidth N. */
785  if (plan->N == 0)
786  {
787  /* N == 0 */
788 
789  /* Constant function */
790  for (m = 0; m < plan->M_total; m++)
791  {
792  plan->f_hat[NFSFT_INDEX(0,0,plan)] += plan->f[m];
793  }
794  }
795  else
796  {
797  /* N > 0 */
798 
799 #ifdef _OPENMP
800  /* Traverse all orders n. */
801  #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,stheta,sphi,it2,it1,k,temp)
802  for (n = -plan->N; n <= plan->N; n++)
803  {
804  /* Take absolute value of n. */
805  n_abs = abs(n);
806 
807  /* Get pointers to three-term recurrence coefficients arrays. */
808  alpha = &(wisdom.alpha[ROW(n_abs)]);
809  gamma = &(wisdom.gamma[ROW(n_abs)]);
810 
811  /* Traverse all nodes. */
812  for (m = 0; m < plan->M_total; m++)
813  {
814  /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
815  stheta = cos(2.0*PI*plan->x[2*m+1]);
816  /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
817  sphi = 2.0*PI*plan->x[2*m];
818 
819  /* Transposed Clenshaw algorithm */
820 
821  /* Initial step */
822  it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
823  pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
824  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
825  it2 = 0.0;
826 
827  if (n_abs < plan->N)
828  {
829  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
830  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
831  }
832 
833  /* Loop for transposed Clenshaw algorithm */
834  for (k = n_abs+2; k <= plan->N; k++)
835  {
836  temp = it2;
837  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
838  it1 = temp;
839  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
840  }
841  }
842  }
843 #else
844  /* Traverse all nodes. */
845  for (m = 0; m < plan->M_total; m++)
846  {
847  /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
848  stheta = cos(2.0*PI*plan->x[2*m+1]);
849  /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
850  sphi = 2.0*PI*plan->x[2*m];
851 
852  /* Traverse all orders n. */
853  for (n = -plan->N; n <= plan->N; n++)
854  {
855  /* Take absolute value of n. */
856  n_abs = abs(n);
857 
858  /* Get pointers to three-term recurrence coefficients arrays. */
859  alpha = &(wisdom.alpha[ROW(n_abs)]);
860  gamma = &(wisdom.gamma[ROW(n_abs)]);
861 
862  /* Transposed Clenshaw algorithm */
863 
864  /* Initial step */
865  it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
866  pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
867  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
868  it2 = 0.0;
869 
870  if (n_abs < plan->N)
871  {
872  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
873  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
874  }
875 
876  /* Loop for transposed Clenshaw algorithm */
877  for (k = n_abs+2; k <= plan->N; k++)
878  {
879  temp = it2;
880  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
881  it1 = temp;
882  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
883  }
884  }
885  }
886 #endif
887  }
888 
889  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
890  * multiply spherical Fourier coefficients with corresponding normalization
891  * weight. */
892  if (plan->flags & NFSFT_NORMALIZED)
893  {
894  /* Traverse Fourier coefficients array. */
895  #pragma omp parallel for default(shared) private(k,n)
896  for (k = 0; k <= plan->N; k++)
897  {
898  for (n = -k; n <= k; n++)
899  {
900  /* Multiply with normalization weight. */
901  plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
902  sqrt((2*k+1)/(4.0*PI));
903  }
904  }
905  }
906 
907  /* Set unused coefficients to zero. */
908  if (plan->flags & NFSFT_ZERO_F_HAT)
909  {
910  for (n = -plan->N; n <= plan->N+1; n++)
911  {
912  memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
913  (plan->N+1+abs(n))*sizeof(double _Complex));
914  }
915  }
916 }
917 
918 void nfsft_trafo(nfsft_plan *plan)
919 {
920  int k; /*< The degree k */
921  int n; /*< The order n */
922 #ifdef MEASURE_TIME
923  ticks t0, t1;
924 #endif
925  #ifdef DEBUG
926  double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
927  t_pre = 0.0;
928  t_norm = 0.0;
929  t_fpt = 0.0;
930  t_c2e = 0.0;
931  t_nfft = 0.0;
932  #endif
933 
934 #ifdef MEASURE_TIME
935  plan->MEASURE_TIME_t[0] = 0.0;
936  plan->MEASURE_TIME_t[1] = 0.0;
937  plan->MEASURE_TIME_t[2] = 0.0;
938 #endif
939 
940  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
941  {
942  return;
943  }
944 
945  /* Check, if precomputation was done and that the bandwidth N is not too
946  * big.
947  */
948  if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
949  {
950  return;
951  }
952 
953  /* Check, if slow transformation should be used due to small bandwidth. */
954  if (plan->N < NFSFT_BREAK_EVEN)
955  {
956  /* Use NDSFT. */
957  nfsft_trafo_direct(plan);
958  }
959 
960  /* Check for correct value of the bandwidth N. */
961  else if (plan->N <= wisdom.N_MAX)
962  {
963  /* Copy spherical Fourier coefficients, if necessary. */
964  if (plan->flags & NFSFT_PRESERVE_F_HAT)
965  {
966  memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
967  sizeof(double _Complex));
968  }
969  else
970  {
971  plan->f_hat_intern = plan->f_hat;
972  }
973 
974  /* Propagate pointer values to the internal NFFT plan to assure
975  * consistency. Pointers may have been modified externally.
976  */
977  plan->plan_nfft.x = plan->x;
978  plan->plan_nfft.f = plan->f;
979  plan->plan_nfft.f_hat = plan->f_hat_intern;
980 
981  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
982  * multiply spherical Fourier coefficients with corresponding normalization
983  * weight. */
984  if (plan->flags & NFSFT_NORMALIZED)
985  {
986  /* Traverse Fourier coefficients array. */
987  #pragma omp parallel for default(shared) private(k,n)
988  for (k = 0; k <= plan->N; k++)
989  {
990  for (n = -k; n <= k; n++)
991  {
992  /* Multiply with normalization weight. */
993  plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
994  sqrt((2*k+1)/(4.0*PI));
995  }
996  }
997  }
998 
999 #ifdef MEASURE_TIME
1000  t0 = getticks();
1001 #endif
1002  /* Check, which polynomial transform algorithm should be used. */
1003  if (plan->flags & NFSFT_USE_DPT)
1004  {
1005 #ifdef _OPENMP
1006  #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1007  for (n = -plan->N; n <= plan->N; n++)
1008  fpt_trafo_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
1009  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1010  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1011  plan->N,0U);
1012 #else
1013  /* Use direct discrete polynomial transform DPT. */
1014  for (n = -plan->N; n <= plan->N; n++)
1015  {
1016  //fprintf(stderr,"nfsft_trafo: n = %d\n",n);
1017  //fflush(stderr);
1018  fpt_trafo_direct(wisdom.set,abs(n),
1019  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1020  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1021  plan->N,0U);
1022  }
1023 #endif
1024  }
1025  else
1026  {
1027 #ifdef _OPENMP
1028  #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1029  for (n = -plan->N; n <= plan->N; n++)
1030  fpt_trafo(wisdom.set_threads[omp_get_thread_num()],abs(n),
1031  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1032  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1033  plan->N,0U);
1034 #else
1035  /* Use fast polynomial transform FPT. */
1036  for (n = -plan->N; n <= plan->N; n++)
1037  {
1038  //fprintf(stderr,"nfsft_trafo: n = %d\n",n);
1039  //fflush(stderr);
1040  fpt_trafo(wisdom.set,abs(n),
1041  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1042  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1043  plan->N,0U);
1044  }
1045 #endif
1046  }
1047 #ifdef MEASURE_TIME
1048  t1 = getticks();
1049  plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
1050 #endif
1051 
1052 #ifdef MEASURE_TIME
1053  t0 = getticks();
1054 #endif
1055  /* Convert Chebyshev coefficients to Fourier coefficients. */
1056  c2e(plan);
1057 #ifdef MEASURE_TIME
1058  t1 = getticks();
1059  plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
1060 #endif
1061 
1062 #ifdef MEASURE_TIME
1063  t0 = getticks();
1064 #endif
1065  /* Check, which nonequispaced discrete Fourier transform algorithm should
1066  * be used.
1067  */
1068  if (plan->flags & NFSFT_USE_NDFT)
1069  {
1070  /* Use NDFT. */
1071  nfft_trafo_direct(&plan->plan_nfft);
1072  }
1073  else
1074  {
1075  /* Use NFFT. */
1076  //fprintf(stderr,"nfsft_adjoint: nfft_trafo\n");
1077  nfft_trafo_2d(&plan->plan_nfft);
1078  }
1079 #ifdef MEASURE_TIME
1080  t1 = getticks();
1081  plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
1082 #endif
1083  }
1084 }
1085 
1086 void nfsft_adjoint(nfsft_plan *plan)
1087 {
1088  int k; /*< The degree k */
1089  int n; /*< The order n */
1090 #ifdef MEASURE_TIME
1091  ticks t0, t1;
1092 #endif
1093 
1094 #ifdef MEASURE_TIME
1095  plan->MEASURE_TIME_t[0] = 0.0;
1096  plan->MEASURE_TIME_t[1] = 0.0;
1097  plan->MEASURE_TIME_t[2] = 0.0;
1098 #endif
1099 
1100  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
1101  {
1102  return;
1103  }
1104 
1105  /* Check, if precomputation was done and that the bandwidth N is not too
1106  * big.
1107  */
1108  if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
1109  {
1110  return;
1111  }
1112 
1113  /* Check, if slow transformation should be used due to small bandwidth. */
1114  if (plan->N < NFSFT_BREAK_EVEN)
1115  {
1116  /* Use adjoint NDSFT. */
1117  nfsft_adjoint_direct(plan);
1118  }
1119  /* Check for correct value of the bandwidth N. */
1120  else if (plan->N <= wisdom.N_MAX)
1121  {
1122  //fprintf(stderr,"nfsft_adjoint: Starting\n");
1123  //fflush(stderr);
1124  /* Propagate pointer values to the internal NFFT plan to assure
1125  * consistency. Pointers may have been modified externally.
1126  */
1127  plan->plan_nfft.x = plan->x;
1128  plan->plan_nfft.f = plan->f;
1129  plan->plan_nfft.f_hat = plan->f_hat;
1130 
1131 #ifdef MEASURE_TIME
1132  t0 = getticks();
1133 #endif
1134  /* Check, which adjoint nonequispaced discrete Fourier transform algorithm
1135  * should be used.
1136  */
1137  if (plan->flags & NFSFT_USE_NDFT)
1138  {
1139  //fprintf(stderr,"nfsft_adjoint: Executing nfft_adjoint_direct\n");
1140  //fflush(stderr);
1141  /* Use adjoint NDFT. */
1142  nfft_adjoint_direct(&plan->plan_nfft);
1143  }
1144  else
1145  {
1146  //fprintf(stderr,"nfsft_adjoint: Executing nfft_adjoint\n");
1147  //fflush(stderr);
1148  //fprintf(stderr,"nfsft_adjoint: nfft_adjoint\n");
1149  /* Use adjoint NFFT. */
1150  nfft_adjoint_2d(&plan->plan_nfft);
1151  }
1152 #ifdef MEASURE_TIME
1153  t1 = getticks();
1154  plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
1155 #endif
1156 
1157  //fprintf(stderr,"nfsft_adjoint: Executing c2e_transposed\n");
1158  //fflush(stderr);
1159 #ifdef MEASURE_TIME
1160  t0 = getticks();
1161 #endif
1162  /* Convert Fourier coefficients to Chebyshev coefficients. */
1163  c2e_transposed(plan);
1164 #ifdef MEASURE_TIME
1165  t1 = getticks();
1166  plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
1167 #endif
1168 
1169 #ifdef MEASURE_TIME
1170  t0 = getticks();
1171 #endif
1172  /* Check, which transposed polynomial transform algorithm should be used */
1173  if (plan->flags & NFSFT_USE_DPT)
1174  {
1175 #ifdef _OPENMP
1176  #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1177  for (n = -plan->N; n <= plan->N; n++)
1178  fpt_transposed_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
1179  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1180  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1181  plan->N,0U);
1182 #else
1183  /* Use transposed DPT. */
1184  for (n = -plan->N; n <= plan->N; n++)
1185  {
1186  //fprintf(stderr,"nfsft_adjoint: Executing dpt_transposed\n");
1187  //fflush(stderr);
1188  fpt_transposed_direct(wisdom.set,abs(n),
1189  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1190  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1191  plan->N,0U);
1192  }
1193 #endif
1194  }
1195  else
1196  {
1197 #ifdef _OPENMP
1198  #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1199  for (n = -plan->N; n <= plan->N; n++)
1200  fpt_transposed(wisdom.set_threads[omp_get_thread_num()],abs(n),
1201  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1202  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1203  plan->N,0U);
1204 #else
1205  //fprintf(stderr,"nfsft_adjoint: fpt_transposed\n");
1206  /* Use transposed FPT. */
1207  for (n = -plan->N; n <= plan->N; n++)
1208  {
1209  //fprintf(stderr,"nfsft_adjoint: Executing fpt_transposed\n");
1210  //fflush(stderr);
1211  fpt_transposed(wisdom.set,abs(n),
1212  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1213  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1214  plan->N,0U);
1215  }
1216 #endif
1217  }
1218 #ifdef MEASURE_TIME
1219  t1 = getticks();
1220  plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
1221 #endif
1222 
1223  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1224  * multiply spherical Fourier coefficients with corresponding normalization
1225  * weight. */
1226  if (plan->flags & NFSFT_NORMALIZED)
1227  {
1228  //fprintf(stderr,"nfsft_adjoint: Normalizing\n");
1229  //fflush(stderr);
1230  /* Traverse Fourier coefficients array. */
1231  #pragma omp parallel for default(shared) private(k,n)
1232  for (k = 0; k <= plan->N; k++)
1233  {
1234  for (n = -k; n <= k; n++)
1235  {
1236  /* Multiply with normalization weight. */
1237  plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
1238  sqrt((2*k+1)/(4.0*PI));
1239  }
1240  }
1241  }
1242 
1243  /* Set unused coefficients to zero. */
1244  if (plan->flags & NFSFT_ZERO_F_HAT)
1245  {
1246  //fprintf(stderr,"nfsft_adjoint: Setting to zero\n");
1247  //fflush(stderr);
1248  for (n = -plan->N; n <= plan->N+1; n++)
1249  {
1250  memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1251  (plan->N+1+abs(n))*sizeof(double _Complex));
1252  }
1253  }
1254  //fprintf(stderr,"nfsft_adjoint: Finished\n");
1255  //fflush(stderr);
1256  }
1257 }
1258 
1259 void nfsft_precompute_x(nfsft_plan *plan)
1260 {
1261  /* Pass angle array to NFFT plan. */
1262  plan->plan_nfft.x = plan->x;
1263 
1264  /* Precompute. */
1265  if (plan->plan_nfft.nfft_flags & PRE_ONE_PSI)
1266  nfft_precompute_one_psi(&plan->plan_nfft);
1267 }
1268 /* \} */

Generated on Tue Apr 30 2013 by Doxygen 1.8.1