65 #define NFSFT_DEFAULT_NFFT_CUTOFF 6
72 #define NFSFT_DEFAULT_THRESHOLD 1000
79 #define NFSFT_BREAK_EVEN 5
115 double _Complex last;
125 memset(plan->
f_hat_intern,0U,(2*plan->
N+2)*
sizeof(
double _Complex));
128 lowe = -plan->
N + (plan->
N%2);
132 for (n = lowe; n <= upe; n += 2)
138 for(k = 1; k <= plan->
N; k++)
149 low = -plan->
N + (1-plan->
N%2);
154 for (n = low; n <= up; n += 2)
166 *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
168 for (k = plan->
N-1; k > 0; k--)
171 *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
193 double _Complex last;
203 lowe = -plan->
N + (plan->
N%2);
207 for (n = lowe; n <= upe; n += 2)
211 xm = &(plan->
f_hat[NFSFT_INDEX(-1,n,plan)]);
212 xp = &(plan->
f_hat[NFSFT_INDEX(+1,n,plan)]);
213 for(k = 1; k <= plan->
N; k++)
221 low = -plan->
N + (1-plan->
N%2);
225 for (n = low; n <= up; n += 2)
229 xm = &(plan->
f_hat[NFSFT_INDEX(-1,n,plan)]);
230 xp = &(plan->
f_hat[NFSFT_INDEX(+1,n,plan)]);
231 for(k = 1; k <= plan->
N; k++)
236 plan->
f_hat[NFSFT_INDEX(0,n,plan)] =
237 -0.25*_Complex_I*plan->
f_hat[NFSFT_INDEX(1,n,plan)];
238 last = plan->
f_hat[NFSFT_INDEX(1,n,plan)];
239 plan->
f_hat[NFSFT_INDEX(1,n,plan)] =
240 -0.25*_Complex_I*plan->
f_hat[NFSFT_INDEX(2,n,plan)];
242 xp = &(plan->
f_hat[NFSFT_INDEX(2,n,plan)]);
243 for (k = 2; k < plan->
N; k++)
246 *xp = -0.25 * _Complex_I * (xp[1] - last);
250 *xp = 0.25 * _Complex_I * last;
252 plan->
f_hat[NFSFT_INDEX(0,n,plan)] *= 2.0;
256 void nfsft_init(
nfsft_plan *plan,
int N,
int M)
259 nfsft_init_advanced(plan, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
263 void nfsft_init_advanced(
nfsft_plan* plan,
int N,
int M,
267 nfsft_init_guru(plan, N, M, flags, PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
271 void nfsft_init_guru(
nfsft_plan *plan,
int N,
int M,
unsigned int flags,
272 unsigned int nfft_flags,
int nfft_cutoff)
290 plan->
N_total = (2*plan->
N+2)*(2*plan->
N+2);
294 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
297 sizeof(
double _Complex));
301 if (plan->
flags & NFSFT_MALLOC_F_HAT)
304 sizeof(
double _Complex));
308 if (plan->
flags & NFSFT_MALLOC_F)
314 if (plan->
flags & NFSFT_MALLOC_X)
320 if (plan->
flags & NFSFT_NO_FAST_ALGORITHM)
329 nfft_size[0] = 2*plan->
N+2;
330 nfft_size[1] = 2*plan->
N+2;
331 fftw_size[0] = 4*plan->
N;
332 fftw_size[1] = 4*plan->
N;
336 nfft_cutoff, nfft_flags,
337 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
356 plan->
mv_trafo = (void (*) (
void* ))nfsft_trafo;
357 plan->
mv_adjoint = (void (*) (
void* ))nfsft_adjoint;
360 void nfsft_precompute(
int N,
double kappa,
unsigned int nfsft_flags,
361 unsigned int fpt_flags)
372 #pragma omp parallel default(shared)
374 int nthreads = omp_get_num_threads();
375 int threadid = omp_get_thread_num();
378 wisdom.nthreads = nthreads;
384 wisdom.flags = nfsft_flags;
388 X(next_power_of_2_exp)(N,&wisdom.
N_MAX,&wisdom.
T_MAX);
391 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
415 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
423 if (wisdom.
alpha != NULL)
426 #pragma omp parallel default(shared) private(n)
428 int nthreads = omp_get_num_threads();
429 int threadid = omp_get_thread_num();
432 wisdom.nthreads = nthreads;
436 wisdom.set_threads[threadid] = fpt_init(wisdom.
N_MAX+1, wisdom.
T_MAX,
437 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
438 for (n = 0; n <= wisdom.
N_MAX; n++)
439 fpt_precompute(wisdom.set_threads[threadid],n,&wisdom.
alpha[ROW(n)],
440 &wisdom.
beta[ROW(n)], &wisdom.
gamma[ROW(n)],n,kappa);
447 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
448 for (n = 0; n <= wisdom.
N_MAX; n++)
453 fpt_precompute(wisdom.
set,n,&wisdom.
alpha[ROW(n)],&wisdom.
beta[ROW(n)],
454 &wisdom.
gamma[ROW(n)],n,kappa);
461 #pragma omp parallel default(shared) private(n)
464 int nthreads = omp_get_num_threads();
465 int threadid = omp_get_thread_num();
468 wisdom.nthreads = nthreads;
475 wisdom.set_threads[threadid] = fpt_init(wisdom.
N_MAX+1, wisdom.
T_MAX,
476 fpt_flags | FPT_AL_SYMMETRY);
478 for (n = 0; n <= wisdom.
N_MAX; n++)
480 alpha_al_row(alpha,wisdom.
N_MAX,n);
481 beta_al_row(beta,wisdom.
N_MAX,n);
482 gamma_al_row(gamma,wisdom.
N_MAX,n);
485 fpt_precompute(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
499 fpt_flags | FPT_AL_SYMMETRY);
500 for (n = 0; n <= wisdom.
N_MAX; n++)
529 void nfsft_forget(
void)
539 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
554 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
561 for (k = 0; k < wisdom.nthreads; k++)
562 fpt_finalize(wisdom.set_threads[k]);
566 fpt_finalize(wisdom.
set);
585 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
591 if (plan->
flags & NFSFT_MALLOC_F_HAT)
598 if (plan->
flags & NFSFT_MALLOC_F)
605 if (plan->
flags & NFSFT_MALLOC_X)
627 double _Complex temp;
639 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
645 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
648 sizeof(
double _Complex));
658 if (plan->
flags & NFSFT_NORMALIZED)
661 #pragma omp parallel for default(shared) private(k,n)
662 for (k = 0; k <= plan->
N; k++)
664 for (n = -k; n <= k; n++)
668 sqrt((2*k+1)/(4.0*
PI));
679 for (m = 0; m < plan->
M_total; m++)
693 #pragma omp parallel for default(shared) private(m,stheta,sphi,f_m,n,a,n_abs,alpha,gamma,it2,it1,k,temp)
694 for (m = 0; m < plan->
M_total; m++)
697 stheta = cos(2.0*
PI*plan->
x[2*m+1]);
699 sphi = 2.0*
PI*plan->
x[2*m];
708 for (n = -plan->
N; n <= plan->N; n++)
717 alpha = &(wisdom.
alpha[ROW(n_abs)]);
718 gamma = &(wisdom.
gamma[ROW(n_abs)]);
723 for (k = plan->
N; k > n_abs + 1; k--)
725 temp = a[k-2] + it2 * gamma[k];
726 it2 = it1 + it2 * alpha[k] * stheta;
733 it2 = it1 + it2 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
742 f_m += it2 * wisdom.
gamma[ROW(n_abs)] *
743 pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
766 double _Complex temp;
776 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
782 memset(plan->
f_hat,0U,plan->
N_total*
sizeof(
double _Complex));
790 for (m = 0; m < plan->
M_total; m++)
792 plan->
f_hat[NFSFT_INDEX(0,0,plan)] += plan->
f[m];
801 #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,stheta,sphi,it2,it1,k,temp)
802 for (n = -plan->
N; n <= plan->N; n++)
808 alpha = &(wisdom.
alpha[ROW(n_abs)]);
809 gamma = &(wisdom.
gamma[ROW(n_abs)]);
812 for (m = 0; m < plan->
M_total; m++)
815 stheta = cos(2.0*
PI*plan->
x[2*m+1]);
817 sphi = 2.0*
PI*plan->
x[2*m];
822 it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
823 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
824 plan->
f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
829 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
830 plan->
f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
834 for (k = n_abs+2; k <= plan->
N; k++)
837 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
839 plan->
f_hat[NFSFT_INDEX(k,n,plan)] += it2;
845 for (m = 0; m < plan->
M_total; m++)
848 stheta = cos(2.0*
PI*plan->
x[2*m+1]);
850 sphi = 2.0*
PI*plan->
x[2*m];
853 for (n = -plan->
N; n <= plan->N; n++)
859 alpha = &(wisdom.
alpha[ROW(n_abs)]);
860 gamma = &(wisdom.
gamma[ROW(n_abs)]);
865 it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
866 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
867 plan->
f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
872 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
873 plan->
f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
877 for (k = n_abs+2; k <= plan->
N; k++)
880 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
882 plan->
f_hat[NFSFT_INDEX(k,n,plan)] += it2;
892 if (plan->
flags & NFSFT_NORMALIZED)
895 #pragma omp parallel for default(shared) private(k,n)
896 for (k = 0; k <= plan->
N; k++)
898 for (n = -k; n <= k; n++)
901 plan->
f_hat[NFSFT_INDEX(k,n,plan)] *=
902 sqrt((2*k+1)/(4.0*
PI));
908 if (plan->
flags & NFSFT_ZERO_F_HAT)
910 for (n = -plan->
N; n <= plan->N+1; n++)
912 memset(&plan->
f_hat[NFSFT_INDEX(-plan->
N-1,n,plan)],0U,
913 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
926 double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
940 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
957 nfsft_trafo_direct(plan);
961 else if (plan->
N <= wisdom.
N_MAX)
964 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
967 sizeof(
double _Complex));
984 if (plan->
flags & NFSFT_NORMALIZED)
987 #pragma omp parallel for default(shared) private(k,n)
988 for (k = 0; k <= plan->
N; k++)
990 for (n = -k; n <= k; n++)
994 sqrt((2*k+1)/(4.0*
PI));
1003 if (plan->
flags & NFSFT_USE_DPT)
1006 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1007 for (n = -plan->
N; n <= plan->N; n++)
1008 fpt_trafo_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
1014 for (n = -plan->
N; n <= plan->N; n++)
1018 fpt_trafo_direct(wisdom.
set,abs(n),
1028 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1029 for (n = -plan->
N; n <= plan->N; n++)
1030 fpt_trafo(wisdom.set_threads[omp_get_thread_num()],abs(n),
1036 for (n = -plan->
N; n <= plan->N; n++)
1040 fpt_trafo(wisdom.
set,abs(n),
1068 if (plan->
flags & NFSFT_USE_NDFT)
1100 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
1117 nfsft_adjoint_direct(plan);
1120 else if (plan->
N <= wisdom.
N_MAX)
1137 if (plan->
flags & NFSFT_USE_NDFT)
1173 if (plan->
flags & NFSFT_USE_DPT)
1176 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1177 for (n = -plan->
N; n <= plan->N; n++)
1178 fpt_transposed_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
1179 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1180 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1184 for (n = -plan->
N; n <= plan->N; n++)
1188 fpt_transposed_direct(wisdom.
set,abs(n),
1189 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1190 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1198 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1199 for (n = -plan->
N; n <= plan->N; n++)
1200 fpt_transposed(wisdom.set_threads[omp_get_thread_num()],abs(n),
1201 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1202 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1207 for (n = -plan->
N; n <= plan->N; n++)
1211 fpt_transposed(wisdom.
set,abs(n),
1212 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1213 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1226 if (plan->
flags & NFSFT_NORMALIZED)
1231 #pragma omp parallel for default(shared) private(k,n)
1232 for (k = 0; k <= plan->
N; k++)
1234 for (n = -k; n <= k; n++)
1237 plan->
f_hat[NFSFT_INDEX(k,n,plan)] *=
1238 sqrt((2*k+1)/(4.0*
PI));
1244 if (plan->
flags & NFSFT_ZERO_F_HAT)
1248 for (n = -plan->
N; n <= plan->N+1; n++)
1250 memset(&plan->
f_hat[NFSFT_INDEX(-plan->
N-1,n,plan)],0U,
1251 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
1266 nfft_precompute_one_psi(&plan->
plan_nfft);