NFFT Logo 3.2.3
fpt.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id: fpt.c 3896 2012-10-10 12:19:26Z tovo $ */
20 
27 #include "config.h"
28 
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdbool.h>
32 #include <math.h>
33 #ifdef HAVE_COMPLEX_H
34 #include <complex.h>
35 #endif
36 
37 #include "nfft3.h"
38 #include "nfft3util.h"
39 #include "infft.h"
40 
41 /* Macros for index calculation. */
42 
44 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
45 
47 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
48 
50 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
51 
53 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
54 
55 #define N_TILDE(y) (y-1)
56 
57 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
58 
59 #define FPT_BREAK_EVEN 4
60 
64 typedef struct fpt_step_
65 {
66  bool stable;
69  int Ns;
70  int ts;
71  double **a11,**a12,**a21,**a22;
72  double *g;
73 } fpt_step;
74 
78 typedef struct fpt_data_
79 {
81  int k_start;
82  double *alphaN;
83  double *betaN;
84  double *gammaN;
85  double alpha_0;
86  double beta_0;
87  double gamma_m1;
88  /* Data for direct transform. */
89  double *_alpha;
90  double *_beta;
91  double *_gamma;
92 } fpt_data;
93 
97 typedef struct fpt_set_s_
98 {
99  int flags;
100  int M;
101  int N;
103  int t;
105  double **xcvecs;
108  double *xc;
109  double _Complex *temp;
110  double _Complex *work;
111  double _Complex *result;
112  double _Complex *vec3;
113  double _Complex *vec4;
114  double _Complex *z;
115  fftw_plan *plans_dct3;
117  fftw_plan *plans_dct2;
119  fftw_r2r_kind *kinds;
121  fftw_r2r_kind *kindsr;
124  int *lengths;
126  /* Data for slow transforms. */
127  double *xc_slow;
128 } fpt_set_s;
129 
130 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
131  double* v, double _Complex* y, double* w, int n)
132 {
133  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
134  double *v_ptr = v, *w_ptr = w;
135  for (l = 0; l < n; l++)
136  *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
137 }
138 
139 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
140 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
141  double* v, double _Complex* y, double* w, int n) \
142 { \
143  const int n2 = n>>1; \
144  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
145  double *v_ptr = v, *w_ptr = w; \
146  for (l = 0; l < n2; l++) \
147  *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
148  v_ptr--; w_ptr--; \
149  for (l = 0; l < n2; l++) \
150  *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
151 }
152 
153 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
154 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
155 
156 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
157 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
158  double* v, double _Complex* y, int n, double *xx) \
159 { \
160  const int n2 = n>>1; \
161  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
162  double *v_ptr = v, *xx_ptr = xx; \
163  for (l = 0; l < n2; l++, v_ptr++) \
164  *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
165  v_ptr--; \
166  for (l = 0; l < n2; l++, v_ptr--) \
167  *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
168 }
169 
170 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
171 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
172 
173 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
174 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
175  double _Complex* y, double* w, int n, double *xx) \
176 { \
177  const int n2 = n>>1; \
178  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
179  double *w_ptr = w, *xx_ptr = xx; \
180  for (l = 0; l < n2; l++, w_ptr++) \
181  *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
182  w_ptr--; \
183  for (l = 0; l < n2; l++, w_ptr--) \
184  *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
185 }
186 
187 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
188 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
189 
190 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
191  double _Complex* y, double* w, int n)
192 {
193  int l;
194  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
195  double *v_ptr = v, *w_ptr = w;
196  for (l = n; l > 0; l--)
197  *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
198 }
199 
200 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
201  double* v, double _Complex* y, double* w, int n)
202 {
203  const int n2 = n>>1; \
204  int l;
205  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
206  double *v_ptr = v, *w_ptr = w;
207  for (l = n2; l > 0; l--)
208  *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
209  v_ptr--; w_ptr--;
210  for (l = n2; l > 0; l--)
211  *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
212 }
213 
214 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
215  double* v, double _Complex* y, double* w, int n, double *xx)
216 {
217  const int n2 = n>>1; \
218  int l;
219  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
220  double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
221  for (l = n2; l > 0; l--, xx_ptr++)
222  *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
223  v_ptr--; w_ptr--;
224  for (l = n2; l > 0; l--, xx_ptr++)
225  *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
226 }
227 
228 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
229  double* v, double _Complex* y, double* w, int n, double *xx)
230 {
231  const int n2 = n>>1; \
232  int l;
233  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
234  double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
235  for (l = n2; l > 0; l--, xx_ptr++)
236  *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
237  v_ptr--; w_ptr--;
238  for (l = n2; l > 0; l--, xx_ptr++)
239  *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
240 }
241 
242 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
243 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
244  double *a21, double *a22, double g, int tau, fpt_set set) \
245 { \
246  \
247  int length = 1<<(tau+1); \
248  \
249  double norm = 1.0/(length<<1); \
250  \
251  /* Compensate for factors introduced by a raw DCT-III. */ \
252  a[0] *= 2.0; \
253  b[0] *= 2.0; \
254  \
255  /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
256  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
257  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
258  \
259  /*for (k = 0; k < length; k++)*/ \
260  /*{*/ \
261  /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
262  /* a11[k],a12[k],a21[k],a22[k]);*/ \
263  /*}*/ \
264  \
265  /* Check, if gamma is zero. */ \
266  if (g == 0.0) \
267  { \
268  /*fprintf(stderr,"gamma == 0!\n");*/ \
269  /* Perform multiplication only for second row. */ \
270  M2_FUNCTION(norm,b,b,a22,a,a21,length); \
271  } \
272  else \
273  { \
274  /*fprintf(stderr,"gamma != 0!\n");*/ \
275  /* Perform multiplication for both rows. */ \
276  M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
277  M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
278  memcpy(b,set->z,length*sizeof(double _Complex)); \
279  /* Compute Chebyshev-coefficients using a DCT-II. */ \
280  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
281  /* Compensate for factors introduced by a raw DCT-II. */ \
282  a[0] *= 0.5; \
283  } \
284  \
285  /* Compute Chebyshev-coefficients using a DCT-II. */ \
286  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
287  /* Compensate for factors introduced by a raw DCT-II. */ \
288  b[0] *= 0.5; \
289 }
290 
291 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
292 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
293 /*FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
294 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)*/
295 
296 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
297  double *a11, double *a12, double *a21, double *a22, double *x,
298  double gam, int tau, fpt_set set)
299 {
301  int length = 1<<(tau+1);
303  double norm = 1.0/(length<<1);
304 
305  UNUSED(a21); UNUSED(a22);
306 
307  /* Compensate for factors introduced by a raw DCT-III. */
308  a[0] *= 2.0;
309  b[0] *= 2.0;
310 
311  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
312  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
313  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
314 
315  /*for (k = 0; k < length; k++)*/
316  /*{*/
317  /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
318  /* a11[k],a12[k],a21[k],a22[k]);*/
319  /*}*/
320 
321  /* Check, if gamma is zero. */
322  if (gam == 0.0)
323  {
324  /*fprintf(stderr,"gamma == 0!\n");*/
325  /* Perform multiplication only for second row. */
326  auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
327  }
328  else
329  {
330  /*fprintf(stderr,"gamma != 0!\n");*/
331  /* Perform multiplication for both rows. */
332  auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
333  auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
334  memcpy(b,set->z,length*sizeof(double _Complex));
335  /* Compute Chebyshev-coefficients using a DCT-II. */
336  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
337  /* Compensate for factors introduced by a raw DCT-II. */
338  a[0] *= 0.5;
339  }
340 
341  /* Compute Chebyshev-coefficients using a DCT-II. */
342  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
343  /* Compensate for factors introduced by a raw DCT-II. */
344  b[0] *= 0.5;
345 }
346 
347 static inline void fpt_do_step_symmetric_l(double _Complex *a, double _Complex *b,
348  double *a11, double *a12, double *a21, double *a22, double *x, double gam, int tau, fpt_set set)
349 {
351  int length = 1<<(tau+1);
353  double norm = 1.0/(length<<1);
354 
355  /* Compensate for factors introduced by a raw DCT-III. */
356  a[0] *= 2.0;
357  b[0] *= 2.0;
358 
359  UNUSED(a11); UNUSED(a12);
360 
361  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
362  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
363  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
364 
365  /*for (k = 0; k < length; k++)*/
366  /*{*/
367  /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
368  /* a11[k],a12[k],a21[k],a22[k]);*/
369  /*}*/
370 
371  /* Check, if gamma is zero. */
372  if (gam == 0.0)
373  {
374  /*fprintf(stderr,"gamma == 0!\n");*/
375  /* Perform multiplication only for second row. */
376  auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
377  }
378  else
379  {
380  /*fprintf(stderr,"gamma != 0!\n");*/
381  /* Perform multiplication for both rows. */
382  auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
383  auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
384  memcpy(b,set->z,length*sizeof(double _Complex));
385  /* Compute Chebyshev-coefficients using a DCT-II. */
386  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
387  /* Compensate for factors introduced by a raw DCT-II. */
388  a[0] *= 0.5;
389  }
390 
391  /* Compute Chebyshev-coefficients using a DCT-II. */
392  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
393  /* Compensate for factors introduced by a raw DCT-II. */
394  b[0] *= 0.5;
395 }
396 
397 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
398 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
399  double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
400 { \
401  \
402  int length = 1<<(tau+1); \
403  \
404  double norm = 1.0/(length<<1); \
405  \
406  /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
407  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
408  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
409  \
410  /* Perform matrix multiplication. */ \
411  M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
412  M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
413  memcpy(a,set->z,length*sizeof(double _Complex)); \
414  \
415  /* Compute Chebyshev-coefficients using a DCT-II. */ \
416  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
417  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
418 }
419 
420 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
421 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
422 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)*/
423 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)*/
424 
425 
426 static inline void fpt_do_step_t_symmetric_u(double _Complex *a,
427  double _Complex *b, double *a11, double *a12, double *x,
428  double gam, int tau, fpt_set set)
429 {
431  int length = 1<<(tau+1);
433  double norm = 1.0/(length<<1);
434 
435  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
436  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
437  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
438 
439  /* Perform matrix multiplication. */
440  abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
441  abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
442  memcpy(a,set->z,length*sizeof(double _Complex));
443 
444  /* Compute Chebyshev-coefficients using a DCT-II. */
445  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
446  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
447 }
448 
449 static inline void fpt_do_step_t_symmetric_l(double _Complex *a,
450  double _Complex *b, double *a21, double *a22,
451  double *x, double gam, int tau, fpt_set set)
452 {
454  int length = 1<<(tau+1);
456  double norm = 1.0/(length<<1);
457 
458  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
459  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
460  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
461 
462  /* Perform matrix multiplication. */
463  abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
464  abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
465  memcpy(a,set->z,length*sizeof(double _Complex));
466 
467  /* Compute Chebyshev-coefficients using a DCT-II. */
468  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
469  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
470 }
471 
472 
473 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
474  const double *beta, const double *gam)
475 {
476  /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
477  * of knots x[0], ..., x[size-1] by the Clenshaw algorithm
478  */
479  int i,j;
480  double a,b,x_val_act,a_old;
481  const double *x_act;
482  double *y_act;
483  const double *alpha_act, *beta_act, *gamma_act;
484 
485  /* Traverse all nodes. */
486  x_act = x;
487  y_act = y;
488  for (i = 0; i < size; i++)
489  {
490  a = 1.0;
491  b = 0.0;
492  x_val_act = *x_act;
493 
494  if (k == 0)
495  {
496  *y_act = 1.0;
497  }
498  else
499  {
500  alpha_act = &(alpha[k]);
501  beta_act = &(beta[k]);
502  gamma_act = &(gam[k]);
503  for (j = k; j > 1; j--)
504  {
505  a_old = a;
506  a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
507  b = a_old*(*gamma_act);
508  alpha_act--;
509  beta_act--;
510  gamma_act--;
511  }
512  *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
513  }
514  x_act++;
515  y_act++;
516  }
517 }
518 
519 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
520  const double *beta, const double *gam)
521 {
522  /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
523  * of knots x[0], ..., x[size-1] by the Clenshaw algorithm
524  */
525  int i,j;
526  double a,b,x_val_act,a_old;
527  const double *x_act;
528  double *y_act, *z_act;
529  const double *alpha_act, *beta_act, *gamma_act;
530 
531  /* Traverse all nodes. */
532  x_act = x;
533  y_act = y;
534  z_act = z;
535  for (i = 0; i < size; i++)
536  {
537  a = 1.0;
538  b = 0.0;
539  x_val_act = *x_act;
540 
541  if (k == 0)
542  {
543  *y_act = 1.0;
544  *z_act = 0.0;
545  }
546  else
547  {
548  alpha_act = &(alpha[k]);
549  beta_act = &(beta[k]);
550  gamma_act = &(gam[k]);
551  for (j = k; j > 1; j--)
552  {
553  a_old = a;
554  a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
555  b = a_old*(*gamma_act);
556  alpha_act--;
557  beta_act--;
558  gamma_act--;
559  }
560  if (i < size1)
561  *z_act = a;
562  *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
563  }
564 
565  x_act++;
566  y_act++;
567  z_act++;
568  }
569  /*gamma_act++;
570  FILE *f = fopen("/Users/keiner/Desktop/nfsft_debug.txt","a");
571  fprintf(f,"size1: %10d, size: %10d\n",size1,size);
572  fclose(f);*/
573 }
574 
575 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
576  const double *alpha, const double *beta, const double *gam, const
577  double threshold)
578 {
579  /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
580  * of knots x[0], ..., x[size-1] by the Clenshaw algorithm
581  */
582  int i,j;
583  double a,b,x_val_act,a_old;
584  const double *x_act;
585  double *y_act, *z_act;
586  const double *alpha_act, *beta_act, *gamma_act;
587  R max = -nfft_float_property(NFFT_R_MAX);
588  const R t = LOG10(FABS(threshold));
589 
590  /* Traverse all nodes. */
591  x_act = x;
592  y_act = y;
593  z_act = z;
594  for (i = 0; i < size; i++)
595  {
596  a = 1.0;
597  b = 0.0;
598  x_val_act = *x_act;
599 
600  if (k == 0)
601  {
602  *y_act = 1.0;
603  *z_act = 0.0;
604  }
605  else
606  {
607  alpha_act = &(alpha[k]);
608  beta_act = &(beta[k]);
609  gamma_act = &(gam[k]);
610  for (j = k; j > 1; j--)
611  {
612  a_old = a;
613  a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
614  b = a_old*(*gamma_act);
615  alpha_act--;
616  beta_act--;
617  gamma_act--;
618  }
619  *z_act = a;
620  *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
621  max = FMAX(max,LOG10(FABS(*y_act)));
622  if (max > t)
623  return 1;
624  }
625  x_act++;
626  y_act++;
627  z_act++;
628  }
629  return 0;
630 }
631 
632 static inline void eval_sum_clenshaw_fast(const int N, const int M,
633  const double _Complex *a, const double *x, double _Complex *y,
634  const double *alpha, const double *beta, const double *gam,
635  const double lambda)
636 {
637  int j,k;
638  double _Complex tmp1, tmp2, tmp3;
639  double xc;
640 
641  /*fprintf(stderr, "Executing eval_sum_clenshaw_fast.\n");
642  fprintf(stderr, "Before transform:\n");
643  for (j = 0; j < N; j++)
644  fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);
645  for (j = 0; j <= M; j++)
646  fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]);*/
647 
648  if (N == 0)
649  for (j = 0; j <= M; j++)
650  y[j] = a[0];
651  else
652  {
653  for (j = 0; j <= M; j++)
654  {
655 #if 0
656  xc = x[j];
657  tmp2 = a[N];
658  tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
659  for (k = N-1; k > 0; k--)
660  {
661  tmp3 = a[k-1]
662  + (alpha[k-1] * xc + beta[k-1]) * tmp1
663  + gam[k] * tmp2;
664  tmp2 = tmp1;
665  tmp1 = tmp3;
666  }
667  y[j] = lambda * tmp1;
668 #else
669  xc = x[j];
670  tmp1 = a[N-1];
671  tmp2 = a[N];
672  for (k = N-1; k > 0; k--)
673  {
674  tmp3 = a[k-1] + tmp2 * gam[k];
675  tmp2 *= (alpha[k] * xc + beta[k]);
676  tmp2 += tmp1;
677  tmp1 = tmp3;
678  /*if (j == 1515)
679  {
680  fprintf(stderr, "k = %d, tmp1 = %e, tmp2 = %e.\n", k, tmp1, tmp2);
681  }*/
682  }
683  tmp2 *= (alpha[0] * xc + beta[0]);
684  //fprintf(stderr, "alpha[0] = %e, beta[0] = %e.\n", alpha[0], beta[0]);
685  y[j] = lambda * (tmp2 + tmp1);
686  //fprintf(stderr, "lambda = %e.\n", lambda);
687 #endif
688  }
689  }
690  /*fprintf(stderr, "Before transform:\n");
691  for (j = 0; j < N; j++)
692  fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);
693  for (j = 0; j <= M; j++)
694  fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]); */
695 }
696 
715 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
716  double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam,
717  double lambda)
718 {
719  int j,k;
720  double _Complex* it1 = temp;
721  double _Complex* it2 = y;
722  double _Complex aux;
723 
724  /* Compute final result by multiplying with the constant lambda */
725  a[0] = 0.0;
726  for (j = 0; j <= M; j++)
727  {
728  it2[j] = lambda * y[j];
729  a[0] += it2[j];
730  }
731 
732  if (N > 0)
733  {
734  /* Compute final step. */
735  a[1] = 0.0;
736  for (j = 0; j <= M; j++)
737  {
738  it1[j] = it2[j];
739  it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
740  a[1] += it2[j];
741  }
742 
743  for (k = 2; k <= N; k++)
744  {
745  a[k] = 0.0;
746  for (j = 0; j <= M; j++)
747  {
748  aux = it1[j];
749  it1[j] = it2[j];
750  it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
751  a[k] += it2[j];
752  }
753  }
754  }
755 }
756 
757 
758 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
759 {
761  int plength;
763  int tau;
765  int m;
766  int k;
767 #ifdef _OPENMP
768  int nthreads = nfft_get_omp_num_threads();
769 #endif
770 
771  /* Allocate memory for new DPT set. */
772  fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
773 
774  /* Save parameters in structure. */
775  set->flags = flags;
776 
777  set->M = M;
778  set->t = t;
779  set->N = 1<<t;
780 
781  /* Allocate memory for M transforms. */
782  set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
783 
784  /* Initialize with NULL pointer. */
785  for (m = 0; m < set->M; m++)
786  set->dpt[m].steps = 0;
787 
788  /* Create arrays with Chebyshev nodes. */
789 
790  /* Initialize array with Chebyshev coefficients for the polynomial x. This
791  * would be trivially an array containing a 1 as second entry with all other
792  * coefficients set to zero. In order to compensate for the multiplicative
793  * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
794 
795  /* Allocate memory for array of pointers to node arrays. */
796  set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
797  /* For each polynomial length starting with 4, compute the Chebyshev nodes
798  * using a DCT-III. */
799  plength = 4;
800  for (tau = 1; tau < t+1; tau++)
801  {
802  /* Allocate memory for current array. */
803  set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
804  for (k = 0; k < plength; k++)
805  {
806  set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
807  }
808  plength = plength << 1;
809  }
810 
812  set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
813  set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
814 
815  set->lengths = (int*) nfft_malloc((set->t/*-1*/)*sizeof(int));
816  set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
817  set->kindsr = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
818  set->kindsr[0] = FFTW_REDFT10;
819  set->kindsr[1] = FFTW_REDFT10;
820  for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
821  {
822  set->lengths[tau] = plength;
823 #ifdef _OPENMP
824 #pragma omp critical (nfft_omp_critical_fftw_plan)
825 {
826  fftw_plan_with_nthreads(nthreads);
827 #endif
828  set->plans_dct2[tau] =
829  fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
830  2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
831  0);
832 #ifdef _OPENMP
833 }
834 #endif
835  }
836 
837  /* Check if fast transform is activated. */
838  if (!(set->flags & FPT_NO_FAST_ALGORITHM))
839  {
841  set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
842  set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
843  set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
844 
846  set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
847  set->kinds = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
848  set->kinds[0] = FFTW_REDFT01;
849  set->kinds[1] = FFTW_REDFT01;
850  for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
851  {
852  set->lengths[tau] = plength;
853 #ifdef _OPENMP
854 #pragma omp critical (nfft_omp_critical_fftw_plan)
855 {
856  fftw_plan_with_nthreads(nthreads);
857 #endif
858  set->plans_dct3[tau] =
859  fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
860  2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
861  0);
862 #ifdef _OPENMP
863 }
864 #endif
865  }
866  nfft_free(set->lengths);
867  nfft_free(set->kinds);
868  nfft_free(set->kindsr);
869  set->lengths = NULL;
870  set->kinds = NULL;
871  set->kindsr = NULL;
872  }
873 
874  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
875  {
876  set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
877  set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
878  }
879 
880  /* Return the newly created DPT set. */
881  return set;
882 }
883 
884 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
885  double *gam, int k_start, const double threshold)
886 {
887 
888  int tau;
889  int l;
890  int plength;
892  int degree;
894  int firstl;
895  int lastl;
896  int plength_stab;
898  int degree_stab;
900  double *a11;
902  double *a12;
904  double *a21;
906  double *a22;
908  const double *calpha;
909  const double *cbeta;
910  const double *cgamma;
911  int needstab = 0;
912  int k_start_tilde;
913  int N_tilde;
914  int clength;
915  int clength_1;
916  int clength_2;
917  int t_stab, N_stab;
918  fpt_data *data;
919 
920  /* Get pointer to DPT data. */
921  data = &(set->dpt[m]);
922 
923  /* Check, if already precomputed. */
924  if (data->steps != NULL)
925  return;
926 
927  /* Save k_start. */
928  data->k_start = k_start;
929 
930  data->gamma_m1 = gam[0];
931 
932  /* Check if fast transform is activated. */
933  if (!(set->flags & FPT_NO_FAST_ALGORITHM))
934  {
935  /* Save recursion coefficients. */
936  data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
937  data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
938  data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
939 
940  for (tau = 2; tau <= set->t; tau++)
941  {
942 
943  data->alphaN[tau-2] = alpha[1<<tau];
944  data->betaN[tau-2] = beta[1<<tau];
945  data->gammaN[tau-2] = gam[1<<tau];
946  }
947 
948  data->alpha_0 = alpha[1];
949  data->beta_0 = beta[1];
950 
951  k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
952  /*set->N*/);
953  N_tilde = N_TILDE(set->N);
954 
955  /* Allocate memory for the cascade with t = log_2(N) many levels. */
956  data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
957 
958  /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
959  plength = 4;
960  for (tau = 1; tau < set->t; tau++)
961  {
962  /* Compute auxilliary values. */
963  degree = plength>>1;
964  /* Compute first l. */
965  firstl = FIRST_L(k_start_tilde,plength);
966  /* Compute last l. */
967  lastl = LAST_L(N_tilde,plength);
968 
969  /* Allocate memory for current level. This level will contain 2^{t-tau-1}
970  * many matrices. */
971  data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
972  * (lastl+1));
973 
974  /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
975  for (l = firstl; l <= lastl; l++)
976  {
977  if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
978  {
979  //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
980  //fflush(stderr);
981  clength = plength/2;
982  }
983  else
984  {
985  clength = plength;
986  }
987 
988  /* Allocate memory for the components of U_{n,tau,l}. */
989  a11 = (double*) nfft_malloc(sizeof(double)*clength);
990  a12 = (double*) nfft_malloc(sizeof(double)*clength);
991  a21 = (double*) nfft_malloc(sizeof(double)*clength);
992  a22 = (double*) nfft_malloc(sizeof(double)*clength);
993 
994  /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
995  * nodes. */
996 
997  /* Get the pointers to the three-term recurrence coeffcients. */
998  calpha = &(alpha[plength*l+1+1]);
999  cbeta = &(beta[plength*l+1+1]);
1000  cgamma = &(gam[plength*l+1+1]);
1001 
1002  if (set->flags & FPT_NO_STABILIZATION)
1003  {
1004  /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
1005  calpha--;
1006  cbeta--;
1007  cgamma--;
1008  eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
1009  cgamma);
1010  eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
1011  cgamma);
1012  needstab = 0;
1013  }
1014  else
1015  {
1016  calpha--;
1017  cbeta--;
1018  cgamma--;
1019  /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
1020  needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
1021  degree-1, calpha, cbeta, cgamma, threshold);
1022  if (needstab == 0)
1023  {
1024  /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
1025  needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
1026  degree, calpha, cbeta, cgamma, threshold);
1027  }
1028  }
1029 
1030 
1031  /* Check if stabilization needed. */
1032  if (needstab == 0)
1033  {
1034  data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
1035  data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
1036  data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
1037  data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
1038  data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
1039  /* No stabilization needed. */
1040  data->steps[tau][l].a11[0] = a11;
1041  data->steps[tau][l].a12[0] = a12;
1042  data->steps[tau][l].a21[0] = a21;
1043  data->steps[tau][l].a22[0] = a22;
1044  data->steps[tau][l].g[0] = gam[plength*l+1+1];
1045  data->steps[tau][l].stable = true;
1046  }
1047  else
1048  {
1049  /* Stabilize. */
1050  degree_stab = degree*(2*l+1);
1051  X(next_power_of_2_exp)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
1052 
1053  /* Old arrays are to small. */
1054  nfft_free(a11);
1055  nfft_free(a12);
1056  nfft_free(a21);
1057  nfft_free(a22);
1058 
1059  data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
1060  data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
1061  data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
1062  data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
1063  data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
1064 
1065  plength_stab = N_stab;
1066 
1067  if (set->flags & FPT_AL_SYMMETRY)
1068  {
1069  if (m <= 1)
1070  {
1071  /* This should never be executed */
1072  clength_1 = plength_stab;
1073  clength_2 = plength_stab;
1074  /* Allocate memory for arrays. */
1075  a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
1076  a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
1077  a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
1078  a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
1079  /* Get the pointers to the three-term recurrence coeffcients. */
1080  calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1081  eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
1082  clength_2, degree_stab-1, calpha, cbeta, cgamma);
1083  eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
1084  clength_2, degree_stab+0, calpha, cbeta, cgamma);
1085  }
1086  else
1087  {
1088  clength = plength_stab/2;
1089  if (m%2 == 0)
1090  {
1091  a11 = (double*) nfft_malloc(sizeof(double)*clength);
1092  a12 = (double*) nfft_malloc(sizeof(double)*clength);
1093  a21 = 0;
1094  a22 = 0;
1095  calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
1096  eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
1097  degree_stab-2, calpha, cbeta, cgamma);
1098  eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
1099  degree_stab-1, calpha, cbeta, cgamma);
1100  }
1101  else
1102  {
1103  a11 = 0;
1104  a12 = 0;
1105  a21 = (double*) nfft_malloc(sizeof(double)*clength);
1106  a22 = (double*) nfft_malloc(sizeof(double)*clength);
1107  calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1108  eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
1109  degree_stab-1,calpha, cbeta, cgamma);
1110  eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
1111  degree_stab+0, calpha, cbeta, cgamma);
1112  }
1113  }
1114  }
1115  else
1116  {
1117  clength_1 = plength_stab;
1118  clength_2 = plength_stab;
1119  a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
1120  a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
1121  a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
1122  a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
1123  calpha = &(alpha[2]);
1124  cbeta = &(beta[2]);
1125  cgamma = &(gam[2]);
1126  calpha--;
1127  cbeta--;
1128  cgamma--;
1129  eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
1130  calpha, cbeta, cgamma);
1131  eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
1132  calpha, cbeta, cgamma);
1133 
1134  }
1135  data->steps[tau][l].a11[0] = a11;
1136  data->steps[tau][l].a12[0] = a12;
1137  data->steps[tau][l].a21[0] = a21;
1138  data->steps[tau][l].a22[0] = a22;
1139 
1140  data->steps[tau][l].g[0] = gam[1+1];
1141  data->steps[tau][l].stable = false;
1142  data->steps[tau][l].ts = t_stab;
1143  data->steps[tau][l].Ns = N_stab;
1144  }
1145  }
1147  plength = plength << 1;
1148  }
1149  }
1150 
1151  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1152  {
1153  /* Check, if recurrence coefficients must be copied. */
1154  if (set->flags & FPT_PERSISTENT_DATA)
1155  {
1156  data->_alpha = (double*) alpha;
1157  data->_beta = (double*) beta;
1158  data->_gamma = (double*) gam;
1159  }
1160  else
1161  {
1162  data->_alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
1163  data->_beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
1164  data->_gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
1165  memcpy(data->_alpha,alpha,(set->N+1)*sizeof(double));
1166  memcpy(data->_beta,beta,(set->N+1)*sizeof(double));
1167  memcpy(data->_gamma,gam,(set->N+1)*sizeof(double));
1168  }
1169  }
1170 }
1171 
1172 void fpt_trafo_direct(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
1173  const int k_end, const unsigned int flags)
1174 {
1175  int j;
1176  fpt_data *data = &(set->dpt[m]);
1177  int Nk;
1178  int tk;
1179  double norm;
1180 
1181  //fprintf(stderr, "Executing dpt.\n");
1182 
1183  X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
1184  norm = 2.0/(Nk<<1);
1185 
1186  //fprintf(stderr, "Norm = %e.\n", norm);
1187 
1188  if (set->flags & FPT_NO_DIRECT_ALGORITHM)
1189  {
1190  return;
1191  }
1192 
1193  if (flags & FPT_FUNCTION_VALUES)
1194  {
1195  /* Fill array with Chebyshev nodes. */
1196  for (j = 0; j <= k_end; j++)
1197  {
1198  set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
1199  //fprintf(stderr, "x[%4d] = %e.\n", j, set->xc_slow[j]);
1200  }
1201 
1202  memset(set->result,0U,data->k_start*sizeof(double _Complex));
1203  memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
1204 
1205  /*eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
1206  y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
1207  data->gamma_m1);*/
1208  eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
1209  y, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], data->gamma_m1);
1210  }
1211  else
1212  {
1213  memset(set->temp,0U,data->k_start*sizeof(double _Complex));
1214  memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
1215 
1216  eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
1217  set->result, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
1218  data->gamma_m1);
1219 
1220  fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
1221  (double*)set->result);
1222 
1223  set->result[0] *= 0.5;
1224  for (j = 0; j < Nk; j++)
1225  {
1226  set->result[j] *= norm;
1227  }
1228 
1229  memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
1230  }
1231 }
1232 
1233 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
1234  const int k_end, const unsigned int flags)
1235 {
1236  /* Get transformation data. */
1237  fpt_data *data = &(set->dpt[m]);
1239  int Nk;
1241  int tk;
1243  int k_start_tilde;
1245  int k_end_tilde;
1246 
1248  int tau;
1250  int firstl;
1252  int lastl;
1254  int l;
1256  int plength;
1258  int plength_stab;
1259  int t_stab;
1261  fpt_step *step;
1263  fftw_plan plan = 0;
1264  int length = k_end+1;
1265  fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
1266 
1268  int k;
1269 
1270  double _Complex *work_ptr;
1271  const double _Complex *x_ptr;
1272 
1273  /* Check, if slow transformation should be used due to small bandwidth. */
1274  if (k_end < FPT_BREAK_EVEN)
1275  {
1276  /* Use NDSFT. */
1277  fpt_trafo_direct(set, m, x, y, k_end, flags);
1278  return;
1279  }
1280 
1281  X(next_power_of_2_exp)(k_end,&Nk,&tk);
1282  k_start_tilde = K_START_TILDE(data->k_start,Nk);
1283  k_end_tilde = K_END_TILDE(k_end,Nk);
1284 
1285  /* Check if fast transform is activated. */
1286  if (set->flags & FPT_NO_FAST_ALGORITHM)
1287  return;
1288 
1289  if (flags & FPT_FUNCTION_VALUES)
1290  {
1291 #ifdef _OPENMP
1292  int nthreads = nfft_get_omp_num_threads();
1293 #pragma omp critical (nfft_omp_critical_fftw_plan)
1294 {
1295  fftw_plan_with_nthreads(nthreads);
1296 #endif
1297  plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
1298  (double*)set->work, NULL, 2, 1, kinds, 0U);
1299 #ifdef _OPENMP
1300 }
1301 #endif
1302  }
1303 
1304  /* Initialize working arrays. */
1305  memset(set->result,0U,2*Nk*sizeof(double _Complex));
1306 
1307  /* The first step. */
1308 
1309  /* Set the first 2*data->k_start coefficients to zero. */
1310  memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
1311 
1312  work_ptr = &set->work[2*data->k_start];
1313  x_ptr = x;
1314 
1315  for (k = 0; k <= k_end_tilde-data->k_start; k++)
1316  {
1317  *work_ptr++ = *x_ptr++;
1318  *work_ptr++ = K(0.0);
1319  }
1320 
1321  /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
1322  memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
1323 
1324  /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
1325  * x_{Nk-1} and x_{Nk-2}. */
1326  if (k_end == Nk)
1327  {
1328  set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
1329  set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
1330  set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
1331  }
1332 
1333  /* Compute the remaining steps. */
1334  plength = 4;
1335  for (tau = 1; tau < tk; tau++)
1336  {
1337  /* Compute first l. */
1338  firstl = FIRST_L(k_start_tilde,plength);
1339  /* Compute last l. */
1340  lastl = LAST_L(k_end_tilde,plength);
1341 
1342  /* Compute the multiplication steps. */
1343  for (l = firstl; l <= lastl; l++)
1344  {
1345  /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
1346  memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
1347  memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
1348  memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
1349  memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
1350 
1351  /* Copy coefficients into first half. */
1352  memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
1353  memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
1354  memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
1355 
1356  /* Get matrix U_{n,tau,l} */
1357  step = &(data->steps[tau][l]);
1358 
1359  /* Check if step is stable. */
1360  if (step->stable)
1361  {
1362  /* Check, if we should do a symmetrizised step. */
1363  if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1364  {
1365  /*for (k = 0; k < plength; k++)
1366  {
1367  fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
1368  step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
1369  }*/
1370  /* Multiply third and fourth polynomial with matrix U. */
1371  //fprintf(stderr,"\nhallo\n");
1372  fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
1373  step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
1374  }
1375  else
1376  {
1377  /* Multiply third and fourth polynomial with matrix U. */
1378  fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1379  step->a21[0], step->a22[0], step->g[0], tau, set);
1380  }
1381 
1382  if (step->g[0] != 0.0)
1383  {
1384  for (k = 0; k < plength; k++)
1385  {
1386  set->work[plength*2*l+k] += set->vec3[k];
1387  }
1388  }
1389  for (k = 0; k < plength; k++)
1390  {
1391  set->work[plength*(2*l+1)+k] += set->vec4[k];
1392  }
1393  }
1394  else
1395  {
1396  /* Stabilize. */
1397 
1398  /* The lengh of the polynomials */
1399  plength_stab = step->Ns;
1400  t_stab = step->ts;
1401 
1402  /*---------*/
1403  /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
1404  fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
1405  fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
1406  fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
1407  /*---------*/
1408 
1409  /* Set rest of vectors explicitely to zero */
1410  /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
1411  plength, plength_stab);*/
1412  memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
1413  memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
1414 
1415  /* Multiply third and fourth polynomial with matrix U. */
1416  /* Check for symmetry. */
1417  if (set->flags & FPT_AL_SYMMETRY)
1418  {
1419  if (m <= 1)
1420  {
1421  fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1422  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1423  }
1424  else if (m%2 == 0)
1425  {
1426  /*fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1427  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1428  fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1429  step->a21[0], step->a22[0],
1430  set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1431  /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1432  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1433  }
1434  else
1435  {
1436  /*fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
1437  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1438  fpt_do_step_symmetric_l(set->vec3, set->vec4,
1439  step->a11[0], step->a12[0],
1440  step->a21[0],
1441  step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1442  /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1443  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1444  }
1445  }
1446  else
1447  {
1448  fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1449  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1450  }
1451 
1452  if (step->g[0] != 0.0)
1453  {
1454  for (k = 0; k < plength_stab; k++)
1455  {
1456  set->result[k] += set->vec3[k];
1457  }
1458  }
1459 
1460  for (k = 0; k < plength_stab; k++)
1461  {
1462  set->result[Nk+k] += set->vec4[k];
1463  }
1464  }
1465  }
1466  /* Double length of polynomials. */
1467  plength = plength<<1;
1468 
1469  /*--------*/
1470  /*for (k = 0; k < 2*Nk; k++)
1471  {
1472  fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
1473  k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
1474  cimag(set->result[k]));
1475  }*/
1476  /*--------*/
1477  }
1478 
1479  /* Add the resulting cascade coeffcients to the coeffcients accumulated from
1480  * the stabilization steps. */
1481  for (k = 0; k < 2*Nk; k++)
1482  {
1483  set->result[k] += set->work[k];
1484  }
1485 
1486  /* The last step. Compute the Chebyshev coeffcients c_k^n from the
1487  * polynomials in front of P_0^n and P_1^n. */
1488  y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
1489  data->alpha_0*set->result[Nk+1]*0.5);
1490  y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
1491  data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
1492  y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
1493  data->beta_0*set->result[Nk+k_end-1] +
1494  data->alpha_0*set->result[Nk+k_end-2]*0.5);
1495  y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
1496  for (k = 2; k <= k_end-2; k++)
1497  {
1498  y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
1499  data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
1500  }
1501 
1502  if (flags & FPT_FUNCTION_VALUES)
1503  {
1504  y[0] *= 2.0;
1505  fftw_execute_r2r(plan,(double*)y,(double*)y);
1506 #pragma omp critical (nfft_omp_critical_fftw_plan)
1507  fftw_destroy_plan(plan);
1508  for (k = 0; k <= k_end; k++)
1509  {
1510  y[k] *= 0.5;
1511  }
1512  }
1513 }
1514 
1515 void fpt_transposed_direct(fpt_set set, const int m, double _Complex *x,
1516  double _Complex *y, const int k_end, const unsigned int flags)
1517 {
1518  int j;
1519  fpt_data *data = &(set->dpt[m]);
1520  int Nk;
1521  int tk;
1522  double norm;
1523 
1524  X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
1525  norm = 2.0/(Nk<<1);
1526 
1527  if (set->flags & FPT_NO_DIRECT_ALGORITHM)
1528  {
1529  return;
1530  }
1531 
1532  if (flags & FPT_FUNCTION_VALUES)
1533  {
1534  for (j = 0; j <= k_end; j++)
1535  {
1536  set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
1537  }
1538 
1539  eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
1540  y, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
1541  data->gamma_m1);
1542 
1543  memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
1544  sizeof(double _Complex));
1545  }
1546  else
1547  {
1548  memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
1549  memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
1550 
1551  for (j = 0; j < Nk; j++)
1552  {
1553  set->result[j] *= norm;
1554  }
1555 
1556  fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
1557  (double*)set->result);
1558 
1559  eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
1560  set->result, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
1561  data->gamma_m1);
1562 
1563  memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
1564  }
1565 }
1566 
1567 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
1568  double _Complex *y, const int k_end, const unsigned int flags)
1569 {
1570  /* Get transformation data. */
1571  fpt_data *data = &(set->dpt[m]);
1573  int Nk;
1575  int tk;
1577  int k_start_tilde;
1579  int k_end_tilde;
1580 
1582  int tau;
1584  int firstl;
1586  int lastl;
1588  int l;
1590  int plength;
1592  int plength_stab;
1594  fpt_step *step;
1596  fftw_plan plan;
1597  int length = k_end+1;
1598  fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
1600  int k;
1601  int t_stab;
1602 
1603  /* Check, if slow transformation should be used due to small bandwidth. */
1604  if (k_end < FPT_BREAK_EVEN)
1605  {
1606  /* Use NDSFT. */
1607  fpt_transposed_direct(set, m, x, y, k_end, flags);
1608  return;
1609  }
1610 
1611  X(next_power_of_2_exp)(k_end,&Nk,&tk);
1612  k_start_tilde = K_START_TILDE(data->k_start,Nk);
1613  k_end_tilde = K_END_TILDE(k_end,Nk);
1614 
1615  /* Check if fast transform is activated. */
1616  if (set->flags & FPT_NO_FAST_ALGORITHM)
1617  {
1618  return;
1619  }
1620 
1621  if (flags & FPT_FUNCTION_VALUES)
1622  {
1623 #ifdef _OPENMP
1624  int nthreads = nfft_get_omp_num_threads();
1625 #pragma omp critical (nfft_omp_critical_fftw_plan)
1626 {
1627  fftw_plan_with_nthreads(nthreads);
1628 #endif
1629  plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
1630  (double*)set->work, NULL, 2, 1, kinds, 0U);
1631 #ifdef _OPENMP
1632 }
1633 #endif
1634  fftw_execute_r2r(plan,(double*)y,(double*)set->result);
1635 #pragma omp critical (nfft_omp_critical_fftw_plan)
1636  fftw_destroy_plan(plan);
1637  for (k = 0; k <= k_end; k++)
1638  {
1639  set->result[k] *= 0.5;
1640  }
1641  }
1642  else
1643  {
1644  memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
1645  }
1646 
1647  /* Initialize working arrays. */
1648  memset(set->work,0U,2*Nk*sizeof(double _Complex));
1649 
1650  /* The last step is now the first step. */
1651  for (k = 0; k <= k_end; k++)
1652  {
1653  set->work[k] = data->gamma_m1*set->result[k];
1654  }
1655  //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double _Complex));
1656 
1657  set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
1658  data->alpha_0*set->result[1]);
1659  for (k = 1; k < k_end; k++)
1660  {
1661  set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
1662  data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
1663  }
1664  if (k_end<Nk)
1665  {
1666  memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
1667  }
1668 
1670  memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
1671 
1672  /* Compute the remaining steps. */
1673  plength = Nk;
1674  for (tau = tk-1; tau >= 1; tau--)
1675  {
1676  /* Compute first l. */
1677  firstl = FIRST_L(k_start_tilde,plength);
1678  /* Compute last l. */
1679  lastl = LAST_L(k_end_tilde,plength);
1680 
1681  /* Compute the multiplication steps. */
1682  for (l = firstl; l <= lastl; l++)
1683  {
1684  /* Initialize second half of coefficient arrays with zeros. */
1685  memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
1686  memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
1687 
1688  memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
1689  (plength/2)*sizeof(double _Complex));
1690 
1691  /* Get matrix U_{n,tau,l} */
1692  step = &(data->steps[tau][l]);
1693 
1694  /* Check if step is stable. */
1695  if (step->stable)
1696  {
1697  if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1698  {
1699  /* Multiply third and fourth polynomial with matrix U. */
1700  fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1701  step->a21[0], step->a22[0], step->g[0], tau, set);
1702  }
1703  else
1704  {
1705  /* Multiply third and fourth polynomial with matrix U. */
1706  fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1707  step->a21[0], step->a22[0], step->g[0], tau, set);
1708  }
1709  memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
1710 
1711  for (k = 0; k < plength; k++)
1712  {
1713  set->work[plength*(4*l+2)/2+k] = set->vec3[k];
1714  }
1715  }
1716  else
1717  {
1718  /* Stabilize. */
1719  plength_stab = step->Ns;
1720  t_stab = step->ts;
1721 
1722  memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
1723  memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
1724 
1725  /* Multiply third and fourth polynomial with matrix U. */
1726  if (set->flags & FPT_AL_SYMMETRY)
1727  {
1728  if (m <= 1)
1729  {
1730  fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1731  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1732  }
1733  else if (m%2 == 0)
1734  {
1735  fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1736  set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1737  }
1738  else
1739  {
1740  fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
1741  step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1742  }
1743  }
1744  else
1745  {
1746  fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1747  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1748  }
1749 
1750  memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
1751 
1752  for (k = 0; k < plength; k++)
1753  {
1754  set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
1755  }
1756  }
1757  }
1758  /* Half the length of polynomial arrays. */
1759  plength = plength>>1;
1760  }
1761 
1762  /* First step */
1763  for (k = 0; k <= k_end_tilde-data->k_start; k++)
1764  {
1765  x[k] = set->work[2*(data->k_start+k)];
1766  }
1767  if (k_end == Nk)
1768  {
1769  x[Nk-data->k_start] =
1770  data->gammaN[tk-2]*set->work[2*(Nk-2)]
1771  + data->betaN[tk-2] *set->work[2*(Nk-1)]
1772  + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
1773  }
1774 }
1775 
1776 void fpt_finalize(fpt_set set)
1777 {
1778  int tau;
1779  int l;
1780  int m;
1781  fpt_data *data;
1782  int k_start_tilde;
1783  int N_tilde;
1784  int firstl, lastl;
1785  int plength;
1786  const int M = set->M;
1787 
1788  /* TODO Clean up DPT transform data structures. */
1789  for (m = 0; m < M; m++)
1790  {
1791  /* Check if precomputed. */
1792  data = &set->dpt[m];
1793  if (data->steps != (fpt_step**)NULL)
1794  {
1795  nfft_free(data->alphaN);
1796  nfft_free(data->betaN);
1797  nfft_free(data->gammaN);
1798  data->alphaN = NULL;
1799  data->betaN = NULL;
1800  data->gammaN = NULL;
1801 
1802  /* Free precomputed data. */
1803  k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
1804  /*set->N*/);
1805  N_tilde = N_TILDE(set->N);
1806  plength = 4;
1807  for (tau = 1; tau < set->t; tau++)
1808  {
1809  /* Compute first l. */
1810  firstl = FIRST_L(k_start_tilde,plength);
1811  /* Compute last l. */
1812  lastl = LAST_L(N_tilde,plength);
1813 
1814  /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
1815  for (l = firstl; l <= lastl; l++)
1816  {
1817  /* Free components. */
1818  nfft_free(data->steps[tau][l].a11[0]);
1819  nfft_free(data->steps[tau][l].a12[0]);
1820  nfft_free(data->steps[tau][l].a21[0]);
1821  nfft_free(data->steps[tau][l].a22[0]);
1822  data->steps[tau][l].a11[0] = NULL;
1823  data->steps[tau][l].a12[0] = NULL;
1824  data->steps[tau][l].a21[0] = NULL;
1825  data->steps[tau][l].a22[0] = NULL;
1826  /* Free components. */
1827  nfft_free(data->steps[tau][l].a11);
1828  nfft_free(data->steps[tau][l].a12);
1829  nfft_free(data->steps[tau][l].a21);
1830  nfft_free(data->steps[tau][l].a22);
1831  nfft_free(data->steps[tau][l].g);
1832  data->steps[tau][l].a11 = NULL;
1833  data->steps[tau][l].a12 = NULL;
1834  data->steps[tau][l].a21 = NULL;
1835  data->steps[tau][l].a22 = NULL;
1836  data->steps[tau][l].g = NULL;
1837  }
1838  /* Free pointers for current level. */
1839  nfft_free(data->steps[tau]);
1840  data->steps[tau] = NULL;
1841  /* Double length of polynomials. */
1842  plength = plength<<1;
1843  }
1844  /* Free steps. */
1845  nfft_free(data->steps);
1846  data->steps = NULL;
1847  }
1848 
1849  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1850  {
1851  /* Check, if recurrence coefficients must be copied. */
1852  //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
1853  if (!(set->flags & FPT_PERSISTENT_DATA))
1854  {
1855  nfft_free(data->_alpha);
1856  nfft_free(data->_beta);
1857  nfft_free(data->_gamma);
1858  }
1859  data->_alpha = NULL;
1860  data->_beta = NULL;
1861  data->_gamma = NULL;
1862  }
1863  }
1864 
1865  /* Delete array of DPT transform data. */
1866  nfft_free(set->dpt);
1867  set->dpt = NULL;
1868 
1869  for (tau = 1; tau < set->t+1; tau++)
1870  {
1871  nfft_free(set->xcvecs[tau-1]);
1872  set->xcvecs[tau-1] = NULL;
1873  }
1874  nfft_free(set->xcvecs);
1875  set->xcvecs = NULL;
1876 
1877  /* Free auxilliary arrays. */
1878  nfft_free(set->work);
1879  nfft_free(set->result);
1880 
1881  /* Check if fast transform is activated. */
1882  if (!(set->flags & FPT_NO_FAST_ALGORITHM))
1883  {
1884  /* Free auxilliary arrays. */
1885  nfft_free(set->vec3);
1886  nfft_free(set->vec4);
1887  nfft_free(set->z);
1888  set->work = NULL;
1889  set->result = NULL;
1890  set->vec3 = NULL;
1891  set->vec4 = NULL;
1892  set->z = NULL;
1893 
1894  /* Free FFTW plans. */
1895  for(tau = 0; tau < set->t/*-1*/; tau++)
1896  {
1897 #pragma omp critical (nfft_omp_critical_fftw_plan)
1898 {
1899  fftw_destroy_plan(set->plans_dct3[tau]);
1900  fftw_destroy_plan(set->plans_dct2[tau]);
1901 }
1902  set->plans_dct3[tau] = NULL;
1903  set->plans_dct2[tau] = NULL;
1904  }
1905 
1906  nfft_free(set->plans_dct3);
1907  nfft_free(set->plans_dct2);
1908  set->plans_dct3 = NULL;
1909  set->plans_dct2 = NULL;
1910  }
1911 
1912  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1913  {
1914  /* Delete arrays of Chebyshev nodes. */
1915  nfft_free(set->xc_slow);
1916  set->xc_slow = NULL;
1917  nfft_free(set->temp);
1918  set->temp = NULL;
1919  }
1920 
1921  /* Free DPT set structure. */
1922  nfft_free(set);
1923 }

Generated on Tue Apr 30 2013 by Doxygen 1.8.1