44 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
47 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
50 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
53 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
55 #define N_TILDE(y) (y-1)
57 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
59 #define FPT_BREAK_EVEN 4
71 double **a11,**a12,**a21,**
a22;
109 double _Complex *temp;
110 double _Complex *work;
111 double _Complex *result;
112 double _Complex *vec3;
113 double _Complex *vec4;
130 static inline void abuvxpwy(
double a,
double b,
double _Complex* u,
double _Complex* x,
131 double* v,
double _Complex* y,
double* w,
int n)
133 int l;
double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
134 double *v_ptr = v, *w_ptr = w;
135 for (l = 0; l < n; l++)
136 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
139 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
140 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
141 double* v, double _Complex* y, double* w, int n) \
143 const int n2 = n>>1; \
144 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
145 double *v_ptr = v, *w_ptr = w; \
146 for (l = 0; l < n2; l++) \
147 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
149 for (l = 0; l < n2; l++) \
150 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
153 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
154 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
156 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
157 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
158 double* v, double _Complex* y, int n, double *xx) \
160 const int n2 = n>>1; \
161 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
162 double *v_ptr = v, *xx_ptr = xx; \
163 for (l = 0; l < n2; l++, v_ptr++) \
164 *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
166 for (l = 0; l < n2; l++, v_ptr--) \
167 *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
170 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
171 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
173 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
174 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
175 double _Complex* y, double* w, int n, double *xx) \
177 const int n2 = n>>1; \
178 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
179 double *w_ptr = w, *xx_ptr = xx; \
180 for (l = 0; l < n2; l++, w_ptr++) \
181 *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
183 for (l = 0; l < n2; l++, w_ptr--) \
184 *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
187 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
188 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
190 static inline
void auvxpwy(
double a,
double _Complex* u,
double _Complex* x,
double* v,
191 double _Complex* y,
double* w,
int n)
194 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
195 double *v_ptr = v, *w_ptr = w;
196 for (l = n; l > 0; l--)
197 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
200 static inline void auvxpwy_symmetric(
double a,
double _Complex* u,
double _Complex* x,
201 double* v,
double _Complex* y,
double* w,
int n)
203 const int n2 = n>>1; \
205 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
206 double *v_ptr = v, *w_ptr = w;
207 for (l = n2; l > 0; l--)
208 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
210 for (l = n2; l > 0; l--)
211 *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
214 static inline void auvxpwy_symmetric_1(
double a,
double _Complex* u,
double _Complex* x,
215 double* v,
double _Complex* y,
double* w,
int n,
double *xx)
217 const int n2 = n>>1; \
219 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
220 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
221 for (l = n2; l > 0; l--, xx_ptr++)
222 *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
224 for (l = n2; l > 0; l--, xx_ptr++)
225 *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
228 static inline void auvxpwy_symmetric_2(
double a,
double _Complex* u,
double _Complex* x,
229 double* v,
double _Complex* y,
double* w,
int n,
double *xx)
231 const int n2 = n>>1; \
233 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
234 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
235 for (l = n2; l > 0; l--, xx_ptr++)
236 *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
238 for (l = n2; l > 0; l--, xx_ptr++)
239 *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
242 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
243 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
244 double *a21, double *a22, double g, int tau, fpt_set set) \
247 int length = 1<<(tau+1); \
249 double norm = 1.0/(length<<1); \
256 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
257 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
270 M2_FUNCTION(norm,b,b,a22,a,a21,length); \
276 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
277 M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
278 memcpy(b,set->z,length*sizeof(double _Complex)); \
280 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
286 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
291 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
292 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
296 static inline
void fpt_do_step_symmetric_u(
double _Complex *a,
double _Complex *b,
297 double *a11,
double *a12,
double *a21,
double *a22,
double *x,
298 double gam,
int tau,
fpt_set set)
301 int length = 1<<(tau+1);
303 double norm = 1.0/(length<<1);
305 UNUSED(a21); UNUSED(a22);
312 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
313 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
326 auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
332 auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
333 auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
334 memcpy(b,set->z,length*
sizeof(
double _Complex));
336 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
342 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
347 static inline void fpt_do_step_symmetric_l(
double _Complex *a,
double _Complex *b,
348 double *a11,
double *a12,
double *a21,
double *a22,
double *x,
double gam,
int tau,
fpt_set set)
351 int length = 1<<(tau+1);
353 double norm = 1.0/(length<<1);
359 UNUSED(a11); UNUSED(a12);
362 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
363 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
376 auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
382 auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
383 auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
384 memcpy(b,set->z,length*
sizeof(
double _Complex));
386 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
392 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
397 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
398 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
399 double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
402 int length = 1<<(tau+1); \
404 double norm = 1.0/(length<<1); \
407 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
408 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
411 M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
412 M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
413 memcpy(a,set->z,length*sizeof(double _Complex)); \
416 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
417 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
420 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
421 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
426 static inline
void fpt_do_step_t_symmetric_u(
double _Complex *a,
427 double _Complex *b,
double *a11,
double *a12,
double *x,
428 double gam,
int tau,
fpt_set set)
431 int length = 1<<(tau+1);
433 double norm = 1.0/(length<<1);
436 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
437 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
440 abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
441 abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
442 memcpy(a,set->z,length*
sizeof(
double _Complex));
445 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
446 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
449 static inline void fpt_do_step_t_symmetric_l(
double _Complex *a,
450 double _Complex *b,
double *a21,
double *a22,
451 double *x,
double gam,
int tau,
fpt_set set)
454 int length = 1<<(tau+1);
456 double norm = 1.0/(length<<1);
459 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
460 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
463 abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
464 abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
465 memcpy(a,set->z,length*
sizeof(
double _Complex));
468 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
469 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
473 static void eval_clenshaw(
const double *x,
double *y,
int size,
int k,
const double *alpha,
474 const double *beta,
const double *gam)
480 double a,b,x_val_act,a_old;
483 const double *alpha_act, *beta_act, *gamma_act;
488 for (i = 0; i < size; i++)
500 alpha_act = &(alpha[k]);
501 beta_act = &(beta[k]);
502 gamma_act = &(gam[k]);
503 for (j = k; j > 1; j--)
506 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
507 b = a_old*(*gamma_act);
512 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
519 static void eval_clenshaw2(
const double *x,
double *z,
double *y,
int size1,
int size,
int k,
const double *alpha,
520 const double *beta,
const double *gam)
526 double a,b,x_val_act,a_old;
528 double *y_act, *z_act;
529 const double *alpha_act, *beta_act, *gamma_act;
535 for (i = 0; i < size; i++)
548 alpha_act = &(alpha[k]);
549 beta_act = &(beta[k]);
550 gamma_act = &(gam[k]);
551 for (j = k; j > 1; j--)
554 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
555 b = a_old*(*gamma_act);
562 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
575 static int eval_clenshaw_thresh2(
const double *x,
double *z,
double *y,
int size,
int k,
576 const double *alpha,
const double *beta,
const double *gam,
const
583 double a,b,x_val_act,a_old;
585 double *y_act, *z_act;
586 const double *alpha_act, *beta_act, *gamma_act;
587 R max = -nfft_float_property(NFFT_R_MAX);
588 const R t = LOG10(FABS(threshold));
594 for (i = 0; i < size; i++)
607 alpha_act = &(alpha[k]);
608 beta_act = &(beta[k]);
609 gamma_act = &(gam[k]);
610 for (j = k; j > 1; j--)
613 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
614 b = a_old*(*gamma_act);
620 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
621 max = FMAX(max,LOG10(FABS(*y_act)));
632 static inline void eval_sum_clenshaw_fast(
const int N,
const int M,
633 const double _Complex *a,
const double *x,
double _Complex *y,
634 const double *alpha,
const double *beta,
const double *gam,
638 double _Complex tmp1, tmp2, tmp3;
649 for (j = 0; j <= M; j++)
653 for (j = 0; j <= M; j++)
658 tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
659 for (k = N-1; k > 0; k--)
662 + (alpha[k-1] * xc + beta[k-1]) * tmp1
667 y[j] = lambda * tmp1;
672 for (k = N-1; k > 0; k--)
674 tmp3 = a[k-1] + tmp2 * gam[k];
675 tmp2 *= (alpha[k] * xc + beta[k]);
683 tmp2 *= (alpha[0] * xc + beta[0]);
685 y[j] = lambda * (tmp2 + tmp1);
716 double _Complex *y,
double _Complex *temp,
double *alpha,
double *beta,
double *gam,
720 double _Complex* it1 = temp;
721 double _Complex* it2 = y;
726 for (j = 0; j <= M; j++)
728 it2[j] = lambda * y[j];
736 for (j = 0; j <= M; j++)
739 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
743 for (k = 2; k <= N; k++)
746 for (j = 0; j <= M; j++)
750 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
758 fpt_set fpt_init(
const int M,
const int t,
const unsigned int flags)
768 int nthreads = nfft_get_omp_num_threads();
785 for (m = 0; m <
set->M; m++)
786 set->dpt[m].steps = 0;
796 set->xcvecs = (
double**)
nfft_malloc((set->t)*
sizeof(
double*));
800 for (tau = 1; tau < t+1; tau++)
803 set->xcvecs[tau-1] = (
double*)
nfft_malloc(plength*
sizeof(
double));
804 for (k = 0; k < plength; k++)
806 set->xcvecs[tau-1][k] = cos(((k+0.5)*
PI)/plength);
808 plength = plength << 1;
812 set->work = (
double _Complex*)
nfft_malloc((2*set->N)*
sizeof(
double _Complex));
813 set->result = (
double _Complex*)
nfft_malloc((2*set->N)*
sizeof(
double _Complex));
815 set->lengths = (
int*)
nfft_malloc((set->t)*
sizeof(int));
816 set->plans_dct2 = (fftw_plan*)
nfft_malloc(
sizeof(fftw_plan)*(
set->t));
817 set->kindsr = (fftw_r2r_kind*)
nfft_malloc(2*
sizeof(fftw_r2r_kind));
818 set->kindsr[0] = FFTW_REDFT10;
819 set->kindsr[1] = FFTW_REDFT10;
820 for (tau = 0, plength = 4; tau <
set->t; tau++, plength<<=1)
822 set->lengths[tau] = plength;
824 #pragma omp critical (nfft_omp_critical_fftw_plan)
826 fftw_plan_with_nthreads(nthreads);
828 set->plans_dct2[tau] =
829 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (
double*)set->work, NULL,
830 2, 1, (
double*)set->result, NULL, 2, 1,set->kindsr,
838 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
841 set->vec3 = (
double _Complex*)
nfft_malloc(set->N*
sizeof(
double _Complex));
842 set->vec4 = (
double _Complex*)
nfft_malloc(set->N*
sizeof(
double _Complex));
843 set->z = (
double _Complex*)
nfft_malloc(set->N*
sizeof(
double _Complex));
846 set->plans_dct3 = (fftw_plan*)
nfft_malloc(
sizeof(fftw_plan)*(
set->t));
847 set->kinds = (fftw_r2r_kind*)
nfft_malloc(2*
sizeof(fftw_r2r_kind));
848 set->kinds[0] = FFTW_REDFT01;
849 set->kinds[1] = FFTW_REDFT01;
850 for (tau = 0, plength = 4; tau <
set->t; tau++, plength<<=1)
852 set->lengths[tau] = plength;
854 #pragma omp critical (nfft_omp_critical_fftw_plan)
856 fftw_plan_with_nthreads(nthreads);
858 set->plans_dct3[tau] =
859 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (
double*)set->work, NULL,
860 2, 1, (
double*)set->result, NULL, 2, 1, set->kinds,
874 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
876 set->xc_slow = (
double*)
nfft_malloc((set->N+1)*
sizeof(double));
877 set->temp = (
double _Complex*)
nfft_malloc((set->N+1)*
sizeof(
double _Complex));
884 void fpt_precompute(
fpt_set set,
const int m,
double *alpha,
double *beta,
885 double *gam,
int k_start,
const double threshold)
908 const double *calpha;
910 const double *cgamma;
921 data = &(
set->dpt[m]);
924 if (data->
steps != NULL)
933 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
940 for (tau = 2; tau <=
set->t; tau++)
943 data->
alphaN[tau-2] = alpha[1<<tau];
944 data->
betaN[tau-2] = beta[1<<tau];
945 data->
gammaN[tau-2] = gam[1<<tau];
953 N_tilde = N_TILDE(set->N);
960 for (tau = 1; tau <
set->t; tau++)
965 firstl =
FIRST_L(k_start_tilde,plength);
967 lastl =
LAST_L(N_tilde,plength);
975 for (l = firstl; l <= lastl; l++)
977 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
989 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
990 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
991 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
992 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
998 calpha = &(alpha[plength*l+1+1]);
999 cbeta = &(beta[plength*l+1+1]);
1000 cgamma = &(gam[plength*l+1+1]);
1002 if (set->flags & FPT_NO_STABILIZATION)
1008 eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
1010 eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
1020 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
1021 degree-1, calpha, cbeta, cgamma, threshold);
1025 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
1026 degree, calpha, cbeta, cgamma, threshold);
1040 data->
steps[tau][l].a11[0] = a11;
1041 data->
steps[tau][l].a12[0] = a12;
1042 data->
steps[tau][l].a21[0] = a21;
1044 data->
steps[tau][l].
g[0] = gam[plength*l+1+1];
1050 degree_stab = degree*(2*l+1);
1051 X(next_power_of_2_exp)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
1065 plength_stab = N_stab;
1067 if (set->flags & FPT_AL_SYMMETRY)
1072 clength_1 = plength_stab;
1073 clength_2 = plength_stab;
1075 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1076 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1077 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1078 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1080 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1081 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
1082 clength_2, degree_stab-1, calpha, cbeta, cgamma);
1083 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
1084 clength_2, degree_stab+0, calpha, cbeta, cgamma);
1088 clength = plength_stab/2;
1091 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1092 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1095 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
1096 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
1097 degree_stab-2, calpha, cbeta, cgamma);
1098 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
1099 degree_stab-1, calpha, cbeta, cgamma);
1105 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1106 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1107 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1108 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
1109 degree_stab-1,calpha, cbeta, cgamma);
1110 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
1111 degree_stab+0, calpha, cbeta, cgamma);
1117 clength_1 = plength_stab;
1118 clength_2 = plength_stab;
1119 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1120 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1121 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1122 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1123 calpha = &(alpha[2]);
1129 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
1130 calpha, cbeta, cgamma);
1131 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
1132 calpha, cbeta, cgamma);
1135 data->
steps[tau][l].a11[0] = a11;
1136 data->
steps[tau][l].a12[0] = a12;
1137 data->
steps[tau][l].a21[0] = a21;
1140 data->
steps[tau][l].
g[0] = gam[1+1];
1142 data->
steps[tau][l].
ts = t_stab;
1143 data->
steps[tau][l].
Ns = N_stab;
1147 plength = plength << 1;
1151 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1154 if (set->flags & FPT_PERSISTENT_DATA)
1156 data->
_alpha = (
double*) alpha;
1157 data->
_beta = (
double*) beta;
1158 data->
_gamma = (
double*) gam;
1165 memcpy(data->
_alpha,alpha,(set->N+1)*
sizeof(
double));
1166 memcpy(data->
_beta,beta,(set->N+1)*
sizeof(
double));
1167 memcpy(data->
_gamma,gam,(set->N+1)*
sizeof(
double));
1172 void fpt_trafo_direct(
fpt_set set,
const int m,
const double _Complex *x,
double _Complex *y,
1173 const int k_end,
const unsigned int flags)
1183 X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
1188 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
1193 if (flags & FPT_FUNCTION_VALUES)
1196 for (j = 0; j <= k_end; j++)
1198 set->xc_slow[j] = cos((
PI*(j+0.5))/(k_end+1));
1202 memset(set->result,0U,data->
k_start*
sizeof(
double _Complex));
1203 memcpy(&set->result[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1208 eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
1213 memset(set->temp,0U,data->
k_start*
sizeof(
double _Complex));
1214 memcpy(&set->temp[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1216 eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
1220 fftw_execute_r2r(set->plans_dct2[tk-2],(
double*)set->result,
1221 (
double*)set->result);
1223 set->result[0] *= 0.5;
1224 for (j = 0; j < Nk; j++)
1226 set->result[j] *= norm;
1229 memcpy(y,set->result,(k_end+1)*
sizeof(
double _Complex));
1233 void fpt_trafo(
fpt_set set,
const int m,
const double _Complex *x,
double _Complex *y,
1234 const int k_end,
const unsigned int flags)
1264 int length = k_end+1;
1265 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
1270 double _Complex *work_ptr;
1271 const double _Complex *x_ptr;
1274 if (k_end < FPT_BREAK_EVEN)
1277 fpt_trafo_direct(
set, m, x, y, k_end, flags);
1281 X(next_power_of_2_exp)(k_end,&Nk,&tk);
1286 if (set->flags & FPT_NO_FAST_ALGORITHM)
1289 if (flags & FPT_FUNCTION_VALUES)
1292 int nthreads = nfft_get_omp_num_threads();
1293 #pragma omp critical (nfft_omp_critical_fftw_plan)
1295 fftw_plan_with_nthreads(nthreads);
1297 plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
1298 (
double*)set->work, NULL, 2, 1, kinds, 0U);
1305 memset(set->result,0U,2*Nk*
sizeof(
double _Complex));
1310 memset(set->work,0U,2*data->
k_start*
sizeof(
double _Complex));
1312 work_ptr = &
set->work[2*data->
k_start];
1315 for (k = 0; k <= k_end_tilde-data->
k_start; k++)
1317 *work_ptr++ = *x_ptr++;
1318 *work_ptr++ = K(0.0);
1322 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*
sizeof(
double _Complex));
1328 set->work[2*(Nk-2)] += data->
gammaN[tk-2]*x[Nk-data->
k_start];
1329 set->work[2*(Nk-1)] += data->
betaN[tk-2]*x[Nk-data->
k_start];
1330 set->work[2*(Nk-1)+1] = data->
alphaN[tk-2]*x[Nk-data->
k_start];
1335 for (tau = 1; tau < tk; tau++)
1338 firstl =
FIRST_L(k_start_tilde,plength);
1340 lastl =
LAST_L(k_end_tilde,plength);
1343 for (l = firstl; l <= lastl; l++)
1346 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*
sizeof(
double _Complex));
1347 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*
sizeof(
double _Complex));
1348 memset(&set->vec3[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
1349 memset(&set->vec4[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
1352 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*
sizeof(
double _Complex));
1353 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*
sizeof(
double _Complex));
1354 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*
sizeof(
double _Complex));
1357 step = &(data->
steps[tau][l]);
1363 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1372 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
1373 step->a12[0], step->a21[0], step->
a22[0], step->
g[0], tau,
set);
1378 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1379 step->a21[0], step->
a22[0], step->
g[0], tau,
set);
1382 if (step->
g[0] != 0.0)
1384 for (k = 0; k < plength; k++)
1386 set->work[plength*2*l+k] +=
set->vec3[k];
1389 for (k = 0; k < plength; k++)
1391 set->work[plength*(2*l+1)+k] += set->vec4[k];
1399 plength_stab = step->
Ns;
1412 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
1413 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
1417 if (set->flags & FPT_AL_SYMMETRY)
1421 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1422 step->a21[0], step->
a22[0], step->
g[0], t_stab-1,
set);
1428 fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1429 step->a21[0], step->
a22[0],
1430 set->xcvecs[t_stab-2], step->
g[0], t_stab-1,
set);
1438 fpt_do_step_symmetric_l(set->vec3, set->vec4,
1439 step->a11[0], step->a12[0],
1441 step->
a22[0], set->xcvecs[t_stab-2], step->
g[0], t_stab-1,
set);
1448 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1449 step->a21[0], step->
a22[0], step->
g[0], t_stab-1,
set);
1452 if (step->
g[0] != 0.0)
1454 for (k = 0; k < plength_stab; k++)
1456 set->result[k] +=
set->vec3[k];
1460 for (k = 0; k < plength_stab; k++)
1462 set->result[Nk+k] +=
set->vec4[k];
1467 plength = plength<<1;
1481 for (k = 0; k < 2*Nk; k++)
1483 set->result[k] +=
set->work[k];
1488 y[0] = data->
gamma_m1*(
set->result[0] + data->
beta_0*
set->result[Nk] +
1489 data->
alpha_0*
set->result[Nk+1]*0.5);
1490 y[1] = data->
gamma_m1*(
set->result[1] + data->
beta_0*
set->result[Nk+1]+
1491 data->
alpha_0*(
set->result[Nk]+
set->result[Nk+2]*0.5));
1492 y[k_end-1] = data->
gamma_m1*(
set->result[k_end-1] +
1493 data->
beta_0*
set->result[Nk+k_end-1] +
1494 data->
alpha_0*
set->result[Nk+k_end-2]*0.5);
1495 y[k_end] = data->
gamma_m1*(0.5*data->
alpha_0*
set->result[Nk+k_end-1]);
1496 for (k = 2; k <= k_end-2; k++)
1498 y[k] = data->
gamma_m1*(
set->result[k] + data->
beta_0*
set->result[Nk+k] +
1499 data->
alpha_0*0.5*(
set->result[Nk+k-1]+
set->result[Nk+k+1]));
1502 if (flags & FPT_FUNCTION_VALUES)
1505 fftw_execute_r2r(plan,(
double*)y,(
double*)y);
1506 #pragma omp critical (nfft_omp_critical_fftw_plan)
1507 fftw_destroy_plan(plan);
1508 for (k = 0; k <= k_end; k++)
1515 void fpt_transposed_direct(
fpt_set set,
const int m,
double _Complex *x,
1516 double _Complex *y,
const int k_end,
const unsigned int flags)
1524 X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
1527 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
1532 if (flags & FPT_FUNCTION_VALUES)
1534 for (j = 0; j <= k_end; j++)
1536 set->xc_slow[j] = cos((
PI*(j+0.5))/(k_end+1));
1544 sizeof(
double _Complex));
1548 memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
1549 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*
sizeof(
double _Complex));
1551 for (j = 0; j < Nk; j++)
1553 set->result[j] *= norm;
1556 fftw_execute_r2r(set->plans_dct3[tk-2],(
double*)set->result,
1557 (
double*)set->result);
1563 memcpy(x,&set->temp[data->
k_start],(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1567 void fpt_transposed(
fpt_set set,
const int m,
double _Complex *x,
1568 double _Complex *y,
const int k_end,
const unsigned int flags)
1597 int length = k_end+1;
1598 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
1604 if (k_end < FPT_BREAK_EVEN)
1607 fpt_transposed_direct(
set, m, x, y, k_end, flags);
1611 X(next_power_of_2_exp)(k_end,&Nk,&tk);
1616 if (set->flags & FPT_NO_FAST_ALGORITHM)
1621 if (flags & FPT_FUNCTION_VALUES)
1624 int nthreads = nfft_get_omp_num_threads();
1625 #pragma omp critical (nfft_omp_critical_fftw_plan)
1627 fftw_plan_with_nthreads(nthreads);
1629 plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
1630 (
double*)set->work, NULL, 2, 1, kinds, 0U);
1634 fftw_execute_r2r(plan,(
double*)y,(
double*)set->result);
1635 #pragma omp critical (nfft_omp_critical_fftw_plan)
1636 fftw_destroy_plan(plan);
1637 for (k = 0; k <= k_end; k++)
1639 set->result[k] *= 0.5;
1644 memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
1648 memset(set->work,0U,2*Nk*
sizeof(
double _Complex));
1651 for (k = 0; k <= k_end; k++)
1653 set->work[k] = data->
gamma_m1*
set->result[k];
1658 data->
alpha_0*
set->result[1]);
1659 for (k = 1; k < k_end; k++)
1662 data->
alpha_0*0.5*(
set->result[k-1]+
set->result[k+1]));
1666 memset(&set->work[k_end],0U,(Nk-k_end)*
sizeof(
double _Complex));
1670 memcpy(set->result,set->work,2*Nk*
sizeof(
double _Complex));
1674 for (tau = tk-1; tau >= 1; tau--)
1677 firstl =
FIRST_L(k_start_tilde,plength);
1679 lastl =
LAST_L(k_end_tilde,plength);
1682 for (l = firstl; l <= lastl; l++)
1685 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*
sizeof(
double _Complex));
1686 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*
sizeof(
double _Complex));
1688 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
1689 (plength/2)*
sizeof(
double _Complex));
1692 step = &(data->
steps[tau][l]);
1697 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1700 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1701 step->a21[0], step->
a22[0], step->
g[0], tau,
set);
1706 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1707 step->a21[0], step->
a22[0], step->
g[0], tau,
set);
1709 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*
sizeof(
double _Complex));
1711 for (k = 0; k < plength; k++)
1713 set->work[plength*(4*l+2)/2+k] = set->vec3[k];
1719 plength_stab = step->
Ns;
1722 memcpy(set->vec3,set->result,plength_stab*
sizeof(
double _Complex));
1723 memcpy(set->vec4,&(set->result[Nk]),plength_stab*
sizeof(
double _Complex));
1726 if (set->flags & FPT_AL_SYMMETRY)
1730 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1731 step->a21[0], step->
a22[0], step->
g[0], t_stab-1,
set);
1735 fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1736 set->xcvecs[t_stab-2], step->
g[0], t_stab-1,
set);
1740 fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
1741 step->a21[0], step->
a22[0], set->xcvecs[t_stab-2], step->
g[0], t_stab-1,
set);
1746 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1747 step->a21[0], step->
a22[0], step->
g[0], t_stab-1,
set);
1750 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*
sizeof(
double _Complex));
1752 for (k = 0; k < plength; k++)
1754 set->work[(plength/2)*(4*l+2)+k] =
set->vec3[k];
1759 plength = plength>>1;
1763 for (k = 0; k <= k_end_tilde-data->
k_start; k++)
1765 x[k] =
set->work[2*(data->
k_start+k)];
1770 data->
gammaN[tk-2]*
set->work[2*(Nk-2)]
1771 + data->
betaN[tk-2] *set->work[2*(Nk-1)]
1772 + data->
alphaN[tk-2]*
set->work[2*(Nk-1)+1];
1776 void fpt_finalize(
fpt_set set)
1786 const int M =
set->M;
1789 for (m = 0; m < M; m++)
1792 data = &
set->dpt[m];
1805 N_tilde = N_TILDE(set->N);
1807 for (tau = 1; tau <
set->t; tau++)
1810 firstl =
FIRST_L(k_start_tilde,plength);
1812 lastl =
LAST_L(N_tilde,plength);
1815 for (l = firstl; l <= lastl; l++)
1822 data->
steps[tau][l].a11[0] = NULL;
1823 data->
steps[tau][l].a12[0] = NULL;
1824 data->
steps[tau][l].a21[0] = NULL;
1832 data->
steps[tau][l].a11 = NULL;
1833 data->
steps[tau][l].a12 = NULL;
1834 data->
steps[tau][l].a21 = NULL;
1836 data->
steps[tau][l].
g = NULL;
1840 data->
steps[tau] = NULL;
1842 plength = plength<<1;
1849 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1853 if (!(set->flags & FPT_PERSISTENT_DATA))
1869 for (tau = 1; tau <
set->t+1; tau++)
1872 set->xcvecs[tau-1] = NULL;
1882 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
1895 for(tau = 0; tau <
set->t; tau++)
1897 #pragma omp critical (nfft_omp_critical_fftw_plan)
1899 fftw_destroy_plan(set->plans_dct3[tau]);
1900 fftw_destroy_plan(set->plans_dct2[tau]);
1902 set->plans_dct3[tau] = NULL;
1903 set->plans_dct2[tau] = NULL;
1908 set->plans_dct3 = NULL;
1909 set->plans_dct2 = NULL;
1912 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1916 set->xc_slow = NULL;