33 #include "cstripack.h"
46 double nfft_elapsed_seconds(ticks t1, ticks t0)
50 return elapsed(t1,t0) / TICKS_PER_SECOND;
86 for (t = 1; t < d; t++)
87 sum = sum * N[t] + idx[t];
106 static void bspline_help(
int k,
double x,
double *scratch,
int j,
int ug,
114 for (i = og + r - k + 1, idx = og; idx >= ug; i--, idx--)
116 a = ((R)(x - i)) / ((R)(k - j));
117 scratch[idx] = (1 - a) * scratch[idx-1] + a * scratch[idx];
138 r=(int)(ceil(x)-1.0);
140 for(idx=0; idx<k; idx++)
152 for(j=1, og=g2+1; j<=g1; j++, og++)
154 a = (x - r + k - 1 - og)/(k - j);
155 scratch[og] = (1 - a) * scratch[og-1];
156 bspline_help(k,x,scratch,j,ug+1,og-1,r);
157 a = (x - r + k - 1 - ug)/(k - j);
158 scratch[ug] = a * scratch[ug];
160 for(og-- ; j<=g2; j++)
162 bspline_help(k,x,scratch,j,ug+1,og,r);
163 a = (x - r + k - 1 - ug)/(k - j);
164 scratch[ug] = a * scratch[ug];
169 bspline_help(k,x,scratch,j,ug,og,r);
171 result_value = scratch[k-1];
173 return(result_value);
183 for(k=0,dot=0; k<n; k++)
184 dot+=conj(x[k])*x[k];
196 for(k=0,dot=0; k<n; k++)
210 for(k=0,dot=0.0; k<n; k++)
211 dot+=w[k]*conj(x[k])*x[k];
223 for(k=0,dot=0.0; k<n; k++)
238 for(k=0,dot=0.0; k<n; k++)
239 dot+=w[k]*w2[k]*w2[k]*conj(x[k])*x[k];
252 for(k=0,dot=0.0; k<n; k++)
253 dot+=w2[k]*w2[k]*conj(x[k])*x[k];
414 x[k]=a*x[k]+w[k]*y[k];
424 x[k]=a*x[k]+w[k]*y[k];
430 int d_pre, d_act, d_post;
431 int N_pre, N_act, N_post;
432 int k_pre, k_act, k_post;
435 double _Complex x_swap;
437 for(d_act=0;d_act<d;d_act++)
439 for(d_pre=0, N_pre=1;d_pre<d_act;d_pre++)
444 for(d_post=d_act+1, N_post=1;d_post<d;d_post++)
447 for(k_pre=0;k_pre<N_pre;k_pre++)
448 for(k_act=0;k_act<N_act/2;k_act++)
449 for(k_post=0;k_post<N_post;k_post++)
451 k=(k_pre*N_act+k_act)*N_post+k_post;
452 k_swap=(k_pre*N_act+k_act+N_act/2)*N_post+k_post;
469 printf (
"\n %s, adr=%p\n", text, (
void*)x);
483 printf(
"%d,\n", x[k]);
488 void X(vpr_double)(R *x,
const int n,
const char *text)
494 printf(
"null pointer\n");
501 printf (
"\n %s, adr=%p\n", text, (
void*)x);
503 for (k = 0; k < n; k++)
508 printf(
"%+.1" FE
",", x[k]);
518 for (k = 0; k < n; k++)
519 printf(
"%+" FE
",\n", x[k]);
525 void X(vpr_complex)(C *x,
const int n,
const char *text)
531 printf(
"\n %s, adr=%p\n", text, (
void*)x);
532 for (k = 0; k < n; k++)
537 printf(
"%+.1" FE
"%+.1" FE
"i,", CREAL(x[k]), CIMAG(x[k]));
546 for (k = 0; k < n; k++)
547 printf(
"%+" FE
"%+" FE
"i,\n", CREAL(x[k]), CIMAG(x[k]));
552 void X(vrand_unit_complex)(C *x,
const int n)
556 for (k = 0; k < n; k++)
557 x[k] = nfft_drand48() + II*nfft_drand48();
560 void X(vrand_shifted_unit_double)(R *x,
const int n)
564 for (k = 0; k < n; k++)
565 x[k] = nfft_drand48() - K(0.5);
569 void X(voronoi_weights_1d)(R *w, R *x,
const int M)
573 w[0] = (x[1]-x[0])/K(2.0);
575 for(j = 1; j < M-1; j++)
576 w[j] = (x[j+1]-x[j-1])/K(2.0);
578 w[M-1] = (x[M-1]-x[M-2])/K(2.0);
624 xc = (
double*)
nfft_malloc((2*M-4+1)*
sizeof(double));
625 yc = (
double*)
nfft_malloc((2*M-4+1)*
sizeof(double));
626 zc = (
double*)
nfft_malloc((2*M-4+1)*
sizeof(double));
627 rc = (
double*)
nfft_malloc((2*M-4+1)*
sizeof(double));
628 vr = (
double*)
nfft_malloc(3*(2*M-4+1)*
sizeof(double));
632 for (k = 0; k < M; k++)
634 x[k] = sin(2.0*
PI*xi[2*k+1])*cos(2.0*
PI*xi[2*k]);
635 y[k] = sin(2.0*
PI*xi[2*k+1])*sin(2.0*
PI*xi[2*k]);
636 z[k] = cos(2.0*
PI*xi[2*k+1]);
640 trmesh_(&Mlocal, x, y, z, list, lptr, lend, &lnew, near, next, dist, &ier);
646 crlist_(&Mlocal, &Mlocal, x, y, z, list, lend, lptr, &lnew, ltri, listc, &nb, xc,
652 for (k = 0; k < M; k++)
670 vr[3*j+1] = yc[kv-1];
671 vr[3*j+2] = zc[kv-1];
676 for (el = 0; el < j; el++)
678 a += areas_(vr, &vr[3*(el+1)],&vr[3*(((el+1)%j)+1)]);
710 R X(modified_fejer)(
const int N,
const int kk)
712 return (K(2.0)/((R)(N*N))*(K(1.0)-FABS(K(2.0)*kk+K(1.0))/((R)N)));
716 R X(modified_jackson2)(
const int N,
const int kk)
719 const R n=(N/K(2.0)+K(1.0))/K(2.0);
722 for (result = K(0.0), kj = kk; kj <= kk+1; kj++)
727 result += K(1.0) - (K(3.0)*k + K(6.0)*n*POW(k,K(2.0))
728 - K(3.0)*POW(k,K(3.0)))/(K(2.0)*n*(K(2.0)*POW(n,K(2.0))+K(1.0)));
730 result+= (K(2.0)*n-k)*(POW(2*n-k,K(2.0))-K(1.0))/(K(2.0)
731 *n*(K(2.0)*POW(n,K(2.0))+K(1.0)));
738 R X(modified_jackson4)(
const int N,
const int kk)
741 const R n = (N/K(2.0)+K(3.0))/K(4.0), normalisation = (K(2416.0)*POW(n,K(7.0))
742 + K(1120.0)*POW(n,K(5.0)) + K(784.0)*POW(n,K(3.0)) + K(720.0)*n);
745 for (result = K(0.0), kj = kk; kj <= kk + 1; kj++)
750 result += K(1.0) - (K(1260.0)*k + (K(1680.0)*POW(n, K(5.0))
751 + K(2240.0)*POW(n, K(3.0)) + K(2940.0)*n)*POW(k, K(2.0))
752 - K(1715.0)*POW(k, K(3.0)) - (K(560.0)*POW(n, K(3.0))
753 + K(1400.0)*n)*POW(k, K(4.0)) + K(490.0)*POW(k, K(5.0))
754 + K(140.0)*n*POW(k, K(6.0)) - K(35.0)*POW(k,K(7.0)))/normalisation;
756 if ((K(1.0) <= k/n) && (k/n < K(2.0)))
757 result += ((K(2472.0)*POW(n, K(7.0)) + K(336.0)*POW(n, K(5.0))
758 + K(3528.0)*POW(n, K(3.0)) - K(1296.0)*n) - (K(392.0)*POW(n, K(6.0))
759 - K(3920.0)*POW(n, K(4.0)) + K(8232.0)*POW(n, K(2.0)) - K(756.0))*k
760 - (K(504.0)*POW(n, K(5.0)) + K(10080.0)*POW(n, K(3.0))
761 - K(5292.0)*n)*POW(k, K(2.0)) - (K(1960.0)*POW(n, K(4.0))
762 - K(7840.0)*POW(n, K(2.0)) + K(1029.0))*POW(k, K(3.0))
763 + (K(2520.0)*POW(n, K(3.0)) - K(2520.0)*n) * POW(k, K(4.0))
764 - (K(1176.0)*POW(n, K(2.0)) - K(294.0)) * POW(k, K(5.0))
765 + K(252.0)*n*POW(k, K(6.0)) - K(21.0)*POW(k, K(7.0)))/normalisation;
767 if ((K(2.0) <= k/n) && (k/n < K(3.0)))
768 result += (-(K(1112.0)*POW(n, K(7.0)) - K(12880.0)*POW(n, K(5.0))
769 + K(7448.0)*POW(n, K(3.0)) - K(720.0)*n) + (K(12152.0)*POW(n, K(6.0))
770 - K(27440.0)*POW(n, K(4.0)) + K(8232.0)*POW(n, K(2.0)) - K(252.0))*k
771 - (K(19320.0)*POW(n, K(5.0)) - K(21280.0)*POW(n, K(3.0))
772 + K(2940.0)*n)*POW(k, K(2.0)) + (K(13720.0)*POW(n, K(4.0))
773 - K(7840.0)*POW(n, K(2.0)) + K(343.0))*POW(k, K(3.0))
774 - (K(5320.0)*POW(n, K(3.0)) - K(1400.0)*n)*POW(k, K(4.0))
775 + (K(1176.0)*POW(n, K(2.0)) - K(98.0))*POW(k, K(5.0))
776 - K(140.0)*n*POW(k, K(6.0)) + K(7.0) * POW(k, K(7.0)))/normalisation;
778 if ((K(3.0) <= k/n) && (k/n < K(4.0)))
779 result += ((4*n-k)*(POW(4*n-k, K(2.0)) - K(1.0))*(POW(4*n-k, K(2.0))
780 - K(4.0))*(POW(4*n-k, K(2.0)) - K(9.0)))/normalisation;
787 R X(modified_sobolev)(
const R mu,
const int kk)
792 for (result = K(0.0), kj = kk; kj <= kk+1; kj++)
798 result += POW((
double)k,-K(2.0)*mu);
805 R X(modified_multiquadric)(
const R mu,
const R c,
const int kk)
810 for (result = K(0.0), kj = kk; kj <= kk+1; kj++)
813 result += POW((
double)(k*k + c*c), -mu);
819 static inline int scaled_modified_bessel_i_series(
const R x,
const R
alpha,
820 const int nb,
const int ize, R *b)
822 const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
823 R tempa = K(1.0), empal = K(1.0) +
alpha, halfx = K(0.0), tempb = K(0.0);
830 tempa = POW(halfx, alpha)/TGAMMA(empal);
835 if (K(1.0) < x + K(1.0))
838 b[0] = tempa + tempa*tempb/empal;
840 if (x != K(0.0) && b[0] == K(0.0))
848 R tempc = halfx, tover = (enmten + enmten)/x;
851 tover = enmten/tempb;
853 for (n = 1; n < nb; n++)
859 if (tempa <= tover*empal)
862 b[n] = tempa + tempa*tempb/empal;
864 if (b[n] == K(0.0) && n < ncalc)
869 for (n = 1; n < nb; n++)
875 static inline void scaled_modified_bessel_i_normalize(
const R x,
876 const R alpha,
const int nb,
const int ize, R *b,
const R sum_)
878 const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
884 sum = sum * TGAMMA(K(1.0) + alpha) * POW(x/K(2.0), -alpha);
894 for (n = 1; n <= nb; n++)
950 int nfft_smbi(
const R x,
const R alpha,
const int nb,
const int ize, R *b)
972 const int nsig = MANT_DIG + 2;
973 const R enten = nfft_float_property(NFFT_R_MAX);
974 const R ensig = POW(K(10.0),(R)nsig);
975 const R rtnsig = POW(K(10.0),-CEIL((R)nsig/K(4.0)));
976 const R xlarge = K(1E4);
977 const R exparg = FLOOR(LOG(POW(K(R_RADIX),K(DBL_MAX_EXP-1))));
980 int l, n, nend, magx, nbmx, ncalc, nstart;
981 R p, em, en, sum, pold, test, empal, tempa, tempb, tempc, psave, plast, tover,
984 magx = LRINT(FLOOR(x));
987 if ( nb <= 0 || x < K(0.0) || alpha < K(0.0) || K(1.0) <= alpha
988 || ((ize != 1 || exparg < x) && (ize != 2 || xlarge < x)))
989 return (MIN(nb,0) - 1);
993 return scaled_modified_bessel_i_series(x,alpha,nb,ize,b);
1001 en = (R) (n+n) + (alpha+
alpha);
1006 test = ensig + ensig;
1008 if ((5*nsig) < (magx << 1))
1009 test = SQRT(test*p);
1011 test /= POW(K(1.585),(R)magx);
1016 tover = enten/ensig;
1020 for (n = nstart; n <= nend; n++)
1025 p = en*plast/x + pold;
1043 p = en*plast/x + pold;
1044 }
while (p <= K(1.0));
1049 test = pold*plast*(K(0.5) - K(0.5)/(tempb * tempb))/ensig;
1055 for (ncalc = nstart; ncalc <= nend; ncalc++)
1059 psave = en*psavel/x + pold;
1060 if (test < psave * psavel)
1070 en = (R) (n+n) + (alpha+
alpha);
1073 test = FMAX(test,SQRT(plast*ensig)*SQRT(p+p));
1083 p = en*plast/x + pold;
1094 emp2al = em - K(1.0) + (alpha+
alpha);
1095 sum = tempa*empal*emp2al/em;
1103 for (l = 1; l <= nend; ++l)
1104 b[n-1 + l] = K(0.0);
1111 for (l = 1; l <= nend; ++l)
1117 tempa = en*tempb/x + tempc;
1128 sum = (sum + tempa*empal)*emp2al/em;
1137 sum = sum + sum + tempa;
1138 scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
1145 b[n-1] = en*tempa/x + tempb;
1149 sum = sum + sum + b[0];
1150 scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
1161 sum = (sum + b[n-1]*empal)*emp2al/em;
1169 for (l = 1; l <= nend; ++l)
1173 b[n-1] = en*b[n]/x + b[n+1];
1181 sum = (sum + b[n-1]*empal)*emp2al/em;
1186 b[0] = K(2.0)*empal*b[1]/x + b[2];
1187 sum = sum + sum + b[0];
1189 scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
1197 void nfft_assertion_failed(
const char *s,
int line,
const char *file)
1200 fprintf(stderr,
"nfft: %s:%d: assertion failed: %s\n", file, line, s);
1213 #if HAVE_DECL_DRAND48 == 0
1214 extern double drand48(
void);
1216 #if HAVE_DECL_SRAND48 == 0
1217 extern void srand48(
long int);
1220 double nfft_drand48(
void)
1225 return ((R)rand())/((R)RAND_MAX);
1229 void nfft_srand48(
long int seed)
1234 srand((
unsigned int)seed);
1239 #define z_swap(_a_, _b_, _t_) do { (_t_) = (_a_); (_a_) = (_b_); (_b_) = (_t_); } while (0)
1246 static void nfft_sort_node_indices_sort_bubble(
int n,
int *keys)
1250 for (i = 0; i < n; ++i)
1253 while (j > 0 && keys[2 * j + 0] < keys[2 * (j - 1) + 0])
1255 z_swap(keys[2 * j + 0], keys[2 * (j - 1) + 0], ti);
1256 z_swap(keys[2 * j + 1], keys[2 * (j - 1) + 1], ti);
1267 static void nfft_sort_node_indices_radix_count(
int n,
int *keys,
int shift,
int mask,
int *counts)
1271 for (i = 0; i < n; ++i)
1273 k = (keys[2 * i + 0] >> shift) & mask;
1283 static void nfft_sort_node_indices_radix_rearrange(
int n,
int *keys_in,
int *keys_out,
int shift,
int mask,
int *displs)
1287 for (i = 0; i < n; ++i)
1289 k = (keys_in[2 * i + 0] >> shift) & mask;
1290 keys_out[2 * displs[k] + 0] = keys_in[2 * i + 0];
1291 keys_out[2 * displs[k] + 1] = keys_in[2 * i + 1];
1303 const int rwidth = 9;
1304 const int radix_n = 1 << rwidth;
1305 const int radix_mask = radix_n - 1;
1306 const int rhigh_in = rhigh;
1310 omp_get_max_threads();
1315 int *from, *to, *tmp;
1318 int lcounts[tmax * radix_n];
1320 int tid = 0, tnum = 1;
1329 #pragma omp parallel private(tid, tnum, i, l, h)
1331 tid = omp_get_thread_num();
1332 tnum = omp_get_num_threads();
1335 for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
1337 l = (tid * n) / tnum;
1338 h = ((tid + 1) * n) / tnum;
1340 nfft_sort_node_indices_radix_count(h - l, from + (2 * l), rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
1346 for (i = 0; i < radix_n; ++i)
1348 for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
1352 #pragma omp parallel private(tid, tnum, i, l, h)
1354 tid = omp_get_thread_num();
1355 tnum = omp_get_num_threads();
1358 l = (tid * n) / tnum;
1359 h = ((tid + 1) * n) / tnum;
1361 nfft_sort_node_indices_radix_rearrange(h - l, from + (2 * l), to, rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
1375 if (to == keys0) memcpy(to, from, n * 2 *
sizeof(
int));
1385 const int rwidth = 9;
1386 const int radix_n = 1 << rwidth;
1387 const int radix_mask = radix_n - 1;
1391 omp_get_max_threads();
1397 int lcounts[tmax * radix_n];
1399 int counts[radix_n], displs[radix_n];
1401 int tid = 0, tnum = 1;
1407 #pragma omp parallel private(tid, tnum, i, l, h)
1409 tid = omp_get_thread_num();
1410 tnum = omp_get_num_threads();
1413 for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
1415 l = (tid * n) / tnum;
1416 h = ((tid + 1) * n) / tnum;
1418 nfft_sort_node_indices_radix_count(h - l, keys0 + (2 * l), rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
1424 for (i = 0; i < radix_n; ++i)
1426 for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
1428 displs[i] = lcounts[0 * radix_n + i];
1429 if (i > 0) counts[i - 1] = displs[i] - displs[i - 1];
1431 counts[radix_n - 1] = n - displs[radix_n - 1];
1434 #pragma omp parallel private(tid, tnum, i, l, h)
1436 tid = omp_get_thread_num();
1437 tnum = omp_get_num_threads();
1440 l = (tid * n) / tnum;
1441 h = ((tid + 1) * n) / tnum;
1443 nfft_sort_node_indices_radix_rearrange(h - l, keys0 + (2 * l), keys1, rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
1448 memcpy(keys0, keys1, n * 2 *
sizeof(
int));
1452 for (i = 0; i < radix_n; ++i)
1456 if (counts[i] > 256)
1459 nfft_sort_node_indices_sort_bubble(counts[i], keys0 + 2 * displs[i]);
1465 int nfft_get_num_threads(
void)
1468 return nfft_get_omp_num_threads();
1475 int nfft_get_omp_num_threads(
void)
1478 #pragma omp parallel default(shared)
1480 int n = omp_get_num_threads();