34 #if defined(NFFT_SINGLE)
35 #define X(name) CONCAT(nfctf_, name)
36 #elif defined(NFFT_LDOUBLE)
37 #define X(name) CONCAT(nfctl_, name)
39 #define X(name) CONCAT(nfct_, name)
42 static inline int fftw_2N(
int n)
47 static inline int fftw_2N_rev(
int n)
49 return (LRINT(K(0.5) * n) + 1);
52 static inline int prod_int(
int *vec,
int d)
56 for (t = 0; t < d; t++)
63 #define NFCT_DEFAULT_FLAGS PRE_PHI_HUT | PRE_PSI | MALLOC_X | MALLOC_F_HAT | \
64 MALLOC_F | FFTW_INIT | FFT_OUT_OF_PLACE
66 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT
68 #define NFCT_SUMMANDS (2 * ths->m + 2)
69 #define NODE(p,r) (ths->x[(p) * ths->d + (r)])
71 #define MACRO_ndct_init_result_trafo \
72 memset(f, 0, ths->M_total * sizeof(R));
73 #define MACRO_ndct_init_result_adjoint \
74 memset(f_hat, 0, ths->N_total * sizeof(R));
76 #define MACRO_nfct_D_init_result_A \
77 memset(g_hat, 0, prod_int(ths->n, ths->d) * sizeof(R));
78 #define MACRO_nfct_D_init_result_T \
79 memset(f_hat, 0, ths->N_total * sizeof(R));
81 #define MACRO_nfct_B_init_result_A \
82 memset(f, 0, ths->M_total * sizeof(R));
83 #define MACRO_nfct_B_init_result_T \
84 memset(g, 0, prod_int(ths->n, ths->d) * sizeof(R));
86 #define NFCT_PRE_WINFUN(d) ths->N[d] = 2 * ths->N[d]; \
87 ths->n[d] = fftw_2N(ths->n[d]);
89 #define NFCT_POST_WINFUN(d) ths->N[d] = LRINT(K(0.5) * ths->N[d]); \
90 ths->n[d] = fftw_2N_rev(ths->n[d]);
92 #define NFCT_WINDOW_HELP_INIT WINDOW_HELP_INIT
94 R X(phi_hut)(X(plan) *ths,
int k,
int d)
97 R phi_hut_tmp = PHI_HUT(k, d);
103 R X(phi)(X(plan) *ths, R x,
int d)
106 R phi_tmp = PHI(x, d);
112 #define MACRO_with_cos_vec cos_vec[t][ka[t]]
113 #define MACRO_without_cos_vec COS(K(2.0) * KPI * ka[t] * NODE(j,t))
115 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]];
116 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (X(phi_hut)(ths, kg[t], t)))
118 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * NFCT_SUMMANDS + lc[t]]
119 #define MACRO_compute_PSI X(phi)(ths, NODE(j,t) - ((R)(lc[t] + lb[t])) / (K(2.0)*((R)(ths->n[t])-K(1.0))), t)
136 #define MACRO_ndct_malloc__cos_vec \
138 cos_vec = (R**)Y(malloc)(ths->d * sizeof(R*)); \
139 for (t = 0; t < ths->d; t++) \
140 cos_vec[t] = (R*)Y(malloc)(ths->N[t] * sizeof(R));
142 #define MACRO_ndct_free__cos_vec \
145 for (t = 0; t < ths->d; t++) \
146 Y(free)(cos_vec[t]); \
150 #define MACRO_ndct_init__cos_vec \
152 for(t = 0; t < ths->d; t++) \
154 cos_vec[t][0] = K(1.0); \
155 cos_vec[t][1] = COS(K(2.0) * KPI * NODE(j,t)); \
156 for (k = 2; k < ths->N[t]; k++) \
157 cos_vec[t][k] = K(2.0) * cos_vec[t][1] * cos_vec[t][k-1] - \
162 #define MACRO_ndct_init__k__cos_k(which_one) \
165 for (t = 0; t < ths->d; t++) \
168 for (t = 0; t < ths->d; t++) \
170 cos_k[t+1] = cos_k[t] * MACRO_ ##which_one; \
174 #define MACRO_ndct_count__k__cos_k(which_one) \
178 while ((ka[i] == ths->N[i]) && (i > 0)) \
184 for (t = i; t < ths->d; t++) \
185 cos_k[t+1] = cos_k[t] * MACRO_ ##which_one; \
188 #define MACRO_ndct_compute__trafo \
190 f[j] += f_hat[k] * cos_k[ths->d]; \
193 #define MACRO_ndct_compute__adjoint \
195 f_hat[k] += f[j] * cos_k[ths->d]; \
199 #define MACRO_ndct(which_one) \
200 void X(which_one ## _direct) (X(plan) *ths) \
206 R *f_hat = ths->f_hat; \
208 MACRO_ndct_init_result_ ## which_one; \
210 for (j = 0; j < ths->M_total; j++) \
212 for (k = 0; k < ths->N[0]; k++) \
214 cos_k[ths->d] = COS(K(2.0) * KPI * k * NODE(j,0)); \
215 MACRO_ndct_compute__ ## which_one; \
221 MACRO_ndct_malloc__cos_vec; \
223 for (j = 0; j < ths->M_total; j++) \
225 MACRO_ndct_init__cos_vec; \
226 MACRO_ndct_init__k__cos_k(with_cos_vec); \
228 for (k = 0; k < ths->N_total; k++) \
230 MACRO_ndct_compute__ ## which_one; \
231 MACRO_ndct_count__k__cos_k(with_cos_vec); \
234 MACRO_ndct_free__cos_vec; \
255 #define MACRO_nfct__lower_boundary(j,act_dim) \
258 LRINT(NODE((j),(act_dim)) * fftw_2N(ths->n[(act_dim)])) - ths->m; \
261 #define MACRO_nfct_D_compute_A \
263 g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \
266 #define MACRO_nfct_D_compute_T \
268 f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \
271 #define MACRO_init__kg \
273 for (t = 0; t < ths->d; t++) \
278 #define MACRO_count__kg \
283 while ((kg[i] == ths->N[i]) && (i > 0)) \
291 #define MACRO_update__phi_inv_k__kg_plain(what_kind, which_phi) \
293 for (t = i; t < ths->d; t++) \
295 MACRO__c_phi_inv_k__ ## what_kind(which_phi); \
296 kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \
300 #define MACRO__c_phi_inv_k__A(which_phi) \
304 c_phi_inv_k[t+1] = c_phi_inv_k[t] * MACRO_ ## which_phi; \
308 c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
312 #define MACRO__c_phi_inv_k__T(which_phi) \
314 c_phi_inv_k[t+1] = c_phi_inv_k[t] * MACRO_ ## which_phi; \
317 #define MACRO_nfct_D(which_one) \
318 static inline void D_ ## which_one (X(plan) *ths) \
323 R c_phi_inv_k[ths->d+1]; \
324 int kg_plain[ths->d+1]; \
327 g_hat = ths->g_hat; \
328 f_hat = ths->f_hat; \
330 MACRO_nfct_D_init_result_ ## which_one \
332 c_phi_inv_k[0] = K(1.0); \
337 if (ths->nfct_flags & PRE_PHI_HUT) \
338 for (k_L = 0; k_L < ths->N_total; k_L++) \
340 MACRO_update__phi_inv_k__kg_plain(which_one, with_PRE_PHI_HUT); \
341 MACRO_nfct_D_compute_ ## which_one; \
345 for (k_L = 0; k_L < ths->N_total; k_L++) \
347 MACRO_update__phi_inv_k__kg_plain(which_one,compute_PHI_HUT_INV); \
348 MACRO_nfct_D_compute_ ## which_one; \
359 #define MACRO_nfct_B_PRE_FULL_PSI_compute_A \
361 (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
364 #define MACRO_nfct_B_PRE_FULL_PSI_compute_T \
366 g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj); \
369 #define MACRO_nfct_B_compute_A \
371 (*fj) += phi_tilde[ths->d] * g[lg_plain[ths->d]]; \
374 #define MACRO_nfct_B_compute_T \
376 g[lg_plain[ths->d]] += phi_tilde[ths->d] * (*fj); \
379 #define MACRO_compute_lg_offset__count_lg(i0) \
384 (lb[(i0)] % fftw_2N(ths->n[(i0)])) + fftw_2N(ths->n[(i0)]); \
386 lg_offset[(i0)] = lb[(i0)] % (fftw_2N(ths->n[(i0)])); \
387 if (lg_offset[(i0)] >= ths->n[(i0)]) \
388 lg_offset[(i0)] = -(fftw_2N(ths->n[(i0)]) - lg_offset[(i0)]); \
391 #define MACRO_set__lg__to__lg_offset \
393 if (lg_offset[i] <= 0) \
395 lg[i] = -lg_offset[i]; \
400 lg[i] = +lg_offset[i]; \
405 #define MACRO_count__lg(dim) \
408 if ((lg[(dim)] == 0) || (lg[(dim)] == ths->n[(dim)]-1)) \
409 count_lg[(dim)] *= -1; \
411 lg[(dim)] += count_lg[(dim)]; \
414 #define MACRO_init_lb_lg_lc \
416 for (i = 0; i < ths->d; i++) \
418 MACRO_nfct__lower_boundary(j, i); \
419 MACRO_compute_lg_offset__count_lg(i); \
420 MACRO_set__lg__to__lg_offset; \
427 #define MACRO_count__lg_lc \
429 MACRO_count__lg(ths->d-1); \
432 while ((lc[i] == NFCT_SUMMANDS) && (i > 0)) \
437 MACRO_count__lg(i - 1); \
439 MACRO_set__lg__to__lg_offset; \
444 #define MACRO_update_phi_tilde_lg_plain(which_one, which_psi) \
446 for (t = i; t < ths->d; t++) \
448 MACRO__phi_tilde__ ## which_one(which_psi); \
449 lg_plain[t+1] = lg_plain[t] * ths->n[t] + lg[t]; \
453 #define MACRO__phi_tilde__A(which_psi) \
455 phi_tilde[t+1] = phi_tilde[t] * MACRO_ ## which_psi; \
458 #define MACRO__phi_tilde__T(which_psi) \
460 if(lg[t] == 0 || lg[t] == ths->n[t] - 1) \
462 phi_tilde[t+1] = phi_tilde[t] * MACRO_ ## which_psi; \
466 phi_tilde[t+1] = K(0.5) * phi_tilde[t] * MACRO_ ## which_psi; \
470 #define MACRO_nfct_B(which_one) \
471 static inline void B_ ## which_one (nfct_plan *ths) \
475 int lprod, l_L, ix; \
478 int lg_offset[ths->d]; \
479 int count_lg[ths->d]; \
480 int lg_plain[ths->d+1]; \
482 R phi_tilde[ths->d+1]; \
485 f = ths->f; g = ths->g; \
487 MACRO_nfct_B_init_result_ ## which_one \
490 if ((ths->nfct_flags & PRE_PSI)&&(ths->nfct_flags & PRE_FULL_PSI)) \
492 for (ix = 0, j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
493 for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \
495 MACRO_nfct_B_PRE_FULL_PSI_compute_ ## which_one; \
500 phi_tilde[0] = K(1.0); \
503 for (t = 0, lprod = 1; t < ths->d; t++) \
504 lprod *= NFCT_SUMMANDS; \
507 if (ths->nfct_flags & PRE_PSI) \
508 for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
510 MACRO_init_lb_lg_lc; \
511 for (l_L = 0; l_L < lprod; l_L++) \
513 MACRO_update_phi_tilde_lg_plain(which_one, with_PRE_PSI); \
514 MACRO_nfct_B_compute_ ## which_one; \
515 MACRO_count__lg_lc; \
521 for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
523 MACRO_init_lb_lg_lc; \
524 for (l_L = 0; l_L < lprod; l_L++) \
526 MACRO_update_phi_tilde_lg_plain(which_one,compute_PSI); \
527 MACRO_nfct_B_compute_ ## which_one; \
528 MACRO_count__lg_lc; \
538 #define MACRO_nfct_full_psi(which_one) \
539 static inline void full_psi__ ## which_one(nfct_plan *ths) \
545 int lg_plain[ths->d+1]; \
546 int count_lg[ths->d]; \
547 int lg_offset[ths->d]; \
552 R phi_tilde[ths->d+1]; \
553 R eps = ths->nfct_full_psi_eps; \
555 int *index_g, *index_f; \
557 int ix, ix_old, size_psi; \
559 phi_tilde[0] = K(1.0); \
562 if (ths->nfct_flags & PRE_PSI) \
564 size_psi = ths->M_total; \
565 index_f = (int*)Y(malloc)(ths->M_total * sizeof(int)); \
566 index_g = (int*)Y(malloc)(size_psi * sizeof(int)); \
567 new_psi = (R*)Y(malloc)(size_psi * sizeof(R)); \
569 for (t = 0,lprod = 1; t < ths->d; t++) \
571 lprod *= NFCT_SUMMANDS; \
572 eps *= nfct_phi(ths, K(0.0), t); \
575 for (ix = 0, ix_old = 0, j = 0; j < ths->M_total; j++) \
577 MACRO_init_lb_lg_lc; \
579 for (l_L = 0; l_L < lprod; l_L++) \
581 MACRO_update_phi_tilde_lg_plain(which_one, with_PRE_PSI); \
583 if (phi_tilde[ths->d] > eps) \
585 index_g[ix] = lg_plain[ths->d]; \
586 new_psi[ix] = phi_tilde[ths->d]; \
589 if (ix == size_psi) \
591 size_psi += ths->M_total; \
592 index_g = (int*)realloc(index_g, size_psi * sizeof(int)); \
593 new_psi = (R*)realloc(new_psi, size_psi * sizeof(R)); \
597 MACRO_count__lg_lc; \
601 index_f[j] = ix - ix_old; \
608 ths->size_psi = size_psi; \
610 index_g = (int*)realloc(index_g, size_psi * sizeof(int)); \
611 new_psi = (R*)realloc(new_psi, size_psi * sizeof(R)); \
613 ths->psi = new_psi; \
614 ths->psi_index_g = index_g; \
615 ths->psi_index_f = index_f; \
620 MACRO_nfct_full_psi(A)
621 MACRO_nfct_full_psi(T)
625 void X(trafo)(X(plan) *ths)
628 ths->g_hat = ths->g1;
641 Z(execute)(ths->my_fftw_r2r_plan);
644 if (ths->nfct_flags & PRE_FULL_PSI)
653 if (ths->nfct_flags & PRE_FULL_PSI)
655 Y(free)(ths->psi_index_g);
656 Y(free)(ths->psi_index_f);
660 void X(adjoint)(X(plan) *ths)
663 ths->g_hat = ths->g2;
666 if (ths->nfct_flags & PRE_FULL_PSI)
675 if (ths->nfct_flags & PRE_FULL_PSI)
677 Y(free)(ths->psi_index_g);
678 Y(free)(ths->psi_index_f);
685 Z(execute)(ths->my_fftw_r2r_plan);
696 static inline
void precompute_phi_hut(X(plan) *ths)
701 ths->c_phi_inv = (R**)Y(malloc)(ths->d *
sizeof(R*));
703 for (t = 0; t < ths->d; t++)
705 ths->c_phi_inv[t] = (R*)Y(malloc)(ths->N[t] *
sizeof(R));
707 for (kg[t] = 0; kg[t] < ths->N[t]; kg[t]++)
709 ths->c_phi_inv[t][kg[t]] = MACRO_compute_PHI_HUT_INV;
714 void X(precompute_psi)(X(plan) *ths)
721 for (t = 0; t < ths->d; t++)
723 for (j = 0; j < ths->M_total; j++)
725 MACRO_nfct__lower_boundary(j, t);
726 for(lc[t] = 0; lc[t] < NFCT_SUMMANDS; lc[t]++)
727 ths->psi[(j * ths->d + t) * NFCT_SUMMANDS + lc[t]] = MACRO_compute_PSI;
732 static inline void init_help(X(plan) *ths)
736 ths->N_total = prod_int(ths->N, ths->d);
737 ths->sigma = (R*)Y(malloc)(ths->d *
sizeof(R));
739 for (t = 0; t < ths->d; t++)
740 ths->sigma[t] = ((R)(ths->n[t] - 1)) / ths->N[t];
743 ths->r2r_kind = (Z(r2r_kind)*)Y(malloc)(ths->d *
sizeof (Z(r2r_kind)));
744 for (t = 0; t < ths->d; t++)
745 ths->r2r_kind[t] = FFTW_REDFT00;
747 NFCT_WINDOW_HELP_INIT;
749 if (ths->nfct_flags & MALLOC_X)
750 ths->x = (R*)Y(malloc)(ths->d * ths->M_total *
sizeof(R));
752 if (ths->nfct_flags & MALLOC_F_HAT)
753 ths->f_hat = (R*)Y(malloc)(ths->N_total *
sizeof(R));
755 if (ths->nfct_flags & MALLOC_F)
756 ths->f = (R*)Y(malloc)(ths->M_total *
sizeof(R));
758 if (ths->nfct_flags & PRE_PHI_HUT)
759 precompute_phi_hut(ths);
762 if (ths->nfct_flags & PRE_PSI)
765 (R*)Y(malloc)(ths->M_total * ths->d * NFCT_SUMMANDS *
sizeof(R));
768 ths->nfct_full_psi_eps = POW(K(10.0), K(-10.0));
771 if (ths->nfct_flags & FFTW_INIT)
774 (R*)Y(malloc)(prod_int(ths->n, ths->d) *
sizeof(R));
776 if (ths->nfct_flags & FFT_OUT_OF_PLACE)
778 (R*) Y(malloc)(prod_int(ths->n, ths->d) *
sizeof(R));
782 ths->my_fftw_r2r_plan =
783 Z(plan_r2r)(ths->d, ths->n, ths->g1, ths->g2, ths->r2r_kind,
787 ths->mv_trafo = (void (*) (
void* ))X(trafo);
788 ths->mv_adjoint = (void (*) (
void* ))X(adjoint);
791 void X(init)(X(plan) *ths,
int d,
int *N,
int M_total)
796 ths->M_total = M_total;
797 ths->N = (
int*) Y(malloc)(ths->d *
sizeof(int));
799 for (t = 0;t < d; t++)
802 ths->n = (
int*) Y(malloc)(ths->d *
sizeof(int));
804 for (t = 0; t < d; t++)
805 ths->n[t] = fftw_2N(Y(next_power_of_2)(ths->N[t]));
811 ths->nfct_flags = NFCT_DEFAULT_FLAGS;
812 ths->fftw_flags = FFTW_DEFAULT_FLAGS;
830 void X(init_guru)(X(plan) *ths,
int d,
int *N,
int M_total,
int *n,
int m,
831 unsigned nfct_flags,
unsigned fftw_flags)
836 ths->M_total = M_total;
838 ths->N = (
int*)Y(malloc)(ths->d *
sizeof(int));
840 for (t = 0; t < d; t++)
843 ths->n = (
int*)Y(malloc)(ths->d *
sizeof(int));
845 for (t = 0; t < d; t++)
850 ths->nfct_flags = nfct_flags;
851 ths->fftw_flags = fftw_flags;
856 void X(init_1d)(X(plan) *ths,
int N0,
int M_total)
861 X(init)(ths, 1, N, M_total);
864 void X(init_2d)(X(plan) *ths,
int N0,
int N1,
int M_total)
870 X(init)(ths, 2, N, M_total);
873 void X(init_3d)(X(plan) *ths,
int N0,
int N1,
int N2,
int M_total)
880 X(init)(ths, 3, N, M_total);
883 void X(finalize)(X(plan) *ths)
887 if (ths->nfct_flags & FFTW_INIT)
889 Z(destroy_plan)(ths->my_fftw_r2r_plan);
891 if (ths->nfct_flags & FFT_OUT_OF_PLACE)
898 if (ths->nfct_flags & PRE_PSI)
903 if (ths->nfct_flags & PRE_PHI_HUT)
905 for (t = 0; t < ths->d; t++)
906 Y(free)(ths->c_phi_inv[t]);
907 Y(free)(ths->c_phi_inv);
910 if (ths->nfct_flags & MALLOC_F)
913 if(ths->nfct_flags & MALLOC_F_HAT)
916 if (ths->nfct_flags & MALLOC_X)
919 WINDOW_HELP_FINALIZE;
925 Y(free)(ths->r2r_kind);