NFFT Logo 3.2.3
fastsum.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id: fastsum.c 3896 2012-10-10 12:19:26Z tovo $ */
20 
27 #include "config.h"
28 
29 #include <stdlib.h>
30 #include <math.h>
31 #ifdef HAVE_COMPLEX_H
32 #include <complex.h>
33 #endif
34 
35 #include "nfft3util.h"
36 #include "nfft3.h"
37 #include "fastsum.h"
38 #include "infft.h"
39 
41 #include "kernels.h"
42 
49 int max_i(int a, int b)
50 {
51  return a >= b ? a : b;
52 }
53 
55 double fak(int n)
56 {
57  if (n<=1) return 1.0;
58  else return (double)n*fak(n-1);
59 }
60 
62 double binom(int n, int m)
63 {
64  return fak(n)/fak(m)/fak(n-m);
65 }
66 
68 double BasisPoly(int m, int r, double xx)
69 {
70  int k;
71  double sum=0.0;
72 
73  for (k=0; k<=m-r; k++) {
74  sum+=binom(m+k,k)*pow((xx+1.0)/2.0,(double)k);
75  }
76  return sum*pow((xx+1.0),(double)r)*pow(1.0-xx,(double)(m+1))/(1<<(m+1))/fak(r); /* 1<<(m+1) = 2^(m+1) */
77 }
78 
80 double _Complex regkern(kernel k, double xx, int p, const double *param, double a, double b)
81 {
82  int r;
83  double _Complex sum=0.0;
84 
85  if (xx<-0.5)
86  xx=-0.5;
87  if (xx>0.5)
88  xx=0.5;
89  if ((xx>=-0.5+b && xx<=-a) || (xx>=a && xx<=0.5-b)) {
90  return k(xx,0,param);
91  }
92  else if (xx<-0.5+b) {
93  sum=(k(-0.5,0,param)+k(0.5,0,param))/2.0
94  *BasisPoly(p-1,0,2.0*xx/b+(1.0-b)/b);
95  for (r=0; r<p; r++) {
96  sum+=pow(-b/2.0,(double)r)
97  *k(-0.5+b,r,param)
98  *BasisPoly(p-1,r,-2.0*xx/b+(b-1.0)/b);
99  }
100  return sum;
101  }
102  else if ((xx>-a) && (xx<a)) {
103  for (r=0; r<p; r++) {
104  sum+=pow(a,(double)r)
105  *( k(-a,r,param)*BasisPoly(p-1,r,xx/a)
106  +k( a,r,param)*BasisPoly(p-1,r,-xx/a)*(r & 1 ? -1 : 1));
107  }
108  return sum;
109  }
110  else if (xx>0.5-b) {
111  sum=(k(-0.5,0,param)+k(0.5,0,param))/2.0
112  *BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b);
113  for (r=0; r<p; r++) {
114  sum+=pow(b/2.0,(double)r)
115  *k(0.5-b,r,param)
116  *BasisPoly(p-1,r,2.0*xx/b-(1.0-b)/b);
117  }
118  return sum;
119  }
120  return k(xx,0,param);
121 }
122 
126 double _Complex regkern1(kernel k, double xx, int p, const double *param, double a, double b)
127 {
128  int r;
129  double _Complex sum=0.0;
130 
131  if (xx<-0.5)
132  xx=-0.5;
133  if (xx>0.5)
134  xx=0.5;
135  if ((xx>=-0.5+b && xx<=-a) || (xx>=a && xx<=0.5-b))
136  {
137  return k(xx,0,param);
138  }
139  else if ((xx>-a) && (xx<a))
140  {
141  for (r=0; r<p; r++) {
142  sum+=pow(a,(double)r)
143  *( k(-a,r,param)*BasisPoly(p-1,r,xx/a)
144  +k( a,r,param)*BasisPoly(p-1,r,-xx/a)*(r & 1 ? -1 : 1));
145  }
146  return sum;
147  }
148  else if (xx<-0.5+b)
149  {
150  for (r=0; r<p; r++) {
151  sum+=pow(b,(double)r)
152  *( k(0.5-b,r,param)*BasisPoly(p-1,r,(xx+0.5)/b)
153  +k(-0.5+b,r,param)*BasisPoly(p-1,r,-(xx+0.5)/b)*(r & 1 ? -1 : 1));
154  }
155  return sum;
156  }
157  else if (xx>0.5-b)
158  {
159  for (r=0; r<p; r++) {
160  sum+=pow(b,(double)r)
161  *( k(0.5-b,r,param)*BasisPoly(p-1,r,(xx-0.5)/b)
162  +k(-0.5+b,r,param)*BasisPoly(p-1,r,-(xx-0.5)/b)*(r & 1 ? -1 : 1));
163  }
164  return sum;
165  }
166  return k(xx,0,param);
167 }
168 
170 double _Complex regkern2(kernel k, double xx, int p, const double *param, double a, double b)
171 {
172  int r;
173  double _Complex sum=0.0;
174 
175  xx=fabs(xx);
176 
177  if (xx>0.5) {
178  for (r=0; r<p; r++) {
179  sum+=pow(b,(double)r)*k(0.5-b,r,param)
180  *(BasisPoly(p-1,r,0)+BasisPoly(p-1,r,0));
181  }
182  return sum;
183  }
184  else if ((a<=xx) && (xx<=0.5-b)) {
185  return k(xx,0,param);
186  }
187  else if (xx<a) {
188  for (r=0; r<p; r++) {
189  sum+=pow(-a,(double)r)*k(a,r,param)
190  *(BasisPoly(p-1,r,xx/a)+BasisPoly(p-1,r,-xx/a));
191  }
192  return sum;
193  }
194  else if ((0.5-b<xx) && (xx<=0.5)) {
195  for (r=0; r<p; r++) {
196  sum+=pow(b,(double)r)*k(0.5-b,r,param)
197  *(BasisPoly(p-1,r,(xx-0.5)/b)+BasisPoly(p-1,r,-(xx-0.5)/b));
198  }
199  return sum;
200  }
201  return 0.0;
202 }
203 
207 double _Complex regkern3(kernel k, double xx, int p, const double *param, double a, double b)
208 {
209  int r;
210  double _Complex sum=0.0;
211 
212  xx=fabs(xx);
213 
214  if (xx>=0.5) {
215  /*return kern(typ,c,0,0.5);*/
216  xx=0.5;
217  }
218  /* else */
219  if ((a<=xx) && (xx<=0.5-b)) {
220  return k(xx,0,param);
221  }
222  else if (xx<a) {
223  for (r=0; r<p; r++) {
224  sum+=pow(-a,(double)r)*k(a,r,param)
225  *(BasisPoly(p-1,r,xx/a)+BasisPoly(p-1,r,-xx/a));
226  }
227  /*sum=kern(typ,c,0,xx); */
228  return sum;
229  }
230  else if ((0.5-b<xx) && (xx<=0.5)) {
231  sum=k(0.5,0,param)*BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b);
232  /* sum=regkern2(typ,c,p,a,b, 0.5)*BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b); */
233  for (r=0; r<p; r++) {
234  sum+=pow(b/2.0,(double)r)
235  *k(0.5-b,r,param)
236  *BasisPoly(p-1,r,2.0*xx/b-(1.0-b)/b);
237  }
238  return sum;
239  }
240  return 0.0;
241 }
242 
244 double _Complex linintkern(const double x, const double _Complex *Add,
245  const int Ad, const double a)
246 {
247  double c,c1,c3;
248  int r;
249  double _Complex f1,f2;
250 
251  c=x*Ad/a;
252  r=c; r=abs(r);
253  f1=Add[r];f2=Add[r+1];
254  c=fabs(c);
255  c1=c-r;
256  c3=c1-1.0;
257  return (-f1*c3+f2*c1);
258 }
259 
260 double _Complex quadrintkern(const double x, const double _Complex *Add,
261  const int Ad, const double a)
262 {
263  double c,c1,c2,c3;
264  int r;
265  double _Complex f0,f1,f2;
266 
267  c=x*Ad/a;
268  r=c; r=abs(r);
269  if (r==0) {f0=Add[r+1];f1=Add[r];f2=Add[r+1];}
270  else { f0=Add[r-1];f1=Add[r];f2=Add[r+1];}
271  c=fabs(c);
272  c1=c-r;
273  c2=c1+1.0;
274  c3=c1-1.0;
275  return (f0*c1*c3/2.0-f1*c2*c3+f2*c2*c1/2.0);
276 }
277 
279 double _Complex kubintkern(const double x, const double _Complex *Add,
280  const int Ad, const double a)
281 {
282  double c,c1,c2,c3,c4;
283  int r;
284  double _Complex f0,f1,f2,f3;
285  c=x*Ad/a;
286  r=c; r=abs(r);
287  if (r==0) {f0=Add[r+1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
288  else { f0=Add[r-1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
289  c=fabs(c);
290  c1=c-r;
291  c2=c1+1.0;
292  c3=c1-1.0;
293  c4=c1-2.0;
294  /* return(-f0*(c-r)*(c-r-1.0)*(c-r-2.0)/6.0+f1*(c-r+1.0)*(c-r-1.0)*(c-r-2.0)/2-
295  f2*(c-r+1.0)*(c-r)*(c-r-2.0)/2+f3*(c-r+1.0)*(c-r)*(c-r-1.0)/6.0); */
296  return(-f0*c1*c3*c4/6.0+f1*c2*c3*c4/2.0-f2*c2*c1*c4/2.0+f3*c2*c1*c3/6.0);
297 }
298 
300 double _Complex kubintkern1(const double x, const double _Complex *Add,
301  const int Ad, const double a)
302 {
303  double c,c1,c2,c3,c4;
304  int r;
305  double _Complex f0,f1,f2,f3;
306  Add+=2;
307  c=(x+a)*Ad/2/a;
308  r=c; r=abs(r);
309  /*if (r==0) {f0=Add[r];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
310  else */
311  { f0=Add[r-1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
312  c=fabs(c);
313  c1=c-r;
314  c2=c1+1.0;
315  c3=c1-1.0;
316  c4=c1-2.0;
317  /* return(-f0*(c-r)*(c-r-1.0)*(c-r-2.0)/6.0+f1*(c-r+1.0)*(c-r-1.0)*(c-r-2.0)/2-
318  f2*(c-r+1.0)*(c-r)*(c-r-2.0)/2+f3*(c-r+1.0)*(c-r)*(c-r-1.0)/6.0); */
319  return(-f0*c1*c3*c4/6.0+f1*c2*c3*c4/2.0-f2*c2*c1*c4/2.0+f3*c2*c1*c3/6.0);
320 }
321 
323 void quicksort(int d, int t, double *x, double _Complex *alpha, int N)
324 {
325  int lpos=0;
326  int rpos=N-1;
327  /*double pivot=x[((N-1)/2)*d+t];*/
328  double pivot=x[(N/2)*d+t];
329 
330  int k;
331  double temp1;
332  double _Complex temp2;
333 
334  while (lpos<=rpos)
335  {
336  while (x[lpos*d+t]<pivot)
337  lpos++;
338  while (x[rpos*d+t]>pivot)
339  rpos--;
340  if (lpos<=rpos)
341  {
342  for (k=0; k<d; k++)
343  {
344  temp1=x[lpos*d+k];
345  x[lpos*d+k]=x[rpos*d+k];
346  x[rpos*d+k]=temp1;
347  }
348  temp2=alpha[lpos];
349  alpha[lpos]=alpha[rpos];
350  alpha[rpos]=temp2;
351 
352  lpos++;
353  rpos--;
354  }
355  }
356  if (0<rpos)
357  quicksort(d,t,x,alpha,rpos+1);
358  if (lpos<N-1)
359  quicksort(d,t,x+lpos*d,alpha+lpos,N-lpos);
360 }
361 
363 static void BuildBox(fastsum_plan *ths)
364 {
365  int t, l;
366  int *box_index;
367  double val[ths->d];
368 
369  box_index = (int *) malloc(ths->box_count * sizeof(int));
370  for (t=0; t < ths->box_count; t++)
371  box_index[t] = 0;
372 
373  for (l=0; l < ths->N_total; l++)
374  {
375  int ind = 0;
376  for (t=0; t < ths->d; t++)
377  {
378  val[t] = ths->x[ths->d * l + t] + 0.25 - ths->eps_B/2.0;
379  ind *= ths->box_count_per_dim;
380  ind += (int) (val[t] / ths->eps_I);
381  }
382  box_index[ind]++;
383  }
384 
385  ths->box_offset[0] = 0;
386  for (t=1; t<=ths->box_count; t++)
387  {
388  ths->box_offset[t] = ths->box_offset[t-1] + box_index[t-1];
389  box_index[t-1] = ths->box_offset[t-1];
390  }
391 
392  for (l=0; l < ths->N_total; l++)
393  {
394  int ind = 0;
395  for (t=0; t < ths->d; t++)
396  {
397  val[t] = ths->x[ths->d * l + t] + 0.25 - ths->eps_B/2.0;
398  ind *= ths->box_count_per_dim;
399  ind += (int) (val[t] / ths->eps_I);
400  }
401 
402  ths->box_alpha[box_index[ind]] = ths->alpha[l];
403 
404  for (t=0; t < ths->d; t++)
405  {
406  ths->box_x[ths->d * box_index[ind] + t] = ths->x[ths->d * l + t];
407  }
408  box_index[ind]++;
409  }
410  free(box_index);
411 }
412 
414 static inline double _Complex calc_SearchBox(int d, double *y, double *x, double _Complex *alpha, int start, int end_lt, const double _Complex *Add, const int Ad, int p, double a, const kernel k, const double *param, const unsigned flags)
415 {
416  double _Complex result = 0.0;
417 
418  int m, l;
419  double r;
420 
421  for (m = start; m < end_lt; m++)
422  {
423  if (d==1)
424  {
425  r = y[0]-x[m];
426  }
427  else
428  {
429  r=0.0;
430  for (l=0; l<d; l++)
431  r+=(y[l]-x[m*d+l])*(y[l]-x[m*d+l]);
432  r=sqrt(r);
433  }
434  if (fabs(r)<a)
435  {
436  result += alpha[m]*k(r,0,param); /* alpha*(kern-regkern) */
437  if (d==1)
438  {
439  if (flags & EXACT_NEARFIELD)
440  result -= alpha[m]*regkern1(k,r,p,param,a,1.0/16.0); /* exact value (in 1D) */
441  else
442  result -= alpha[m]*kubintkern1(r,Add,Ad,a); /* spline approximation */
443  }
444  else
445  {
446  if (flags & EXACT_NEARFIELD)
447  result -= alpha[m]*regkern(k,r,p,param,a,1.0/16.0); /* exact value (in dD) */
448  else
449 #if defined(NF_KUB)
450  result -= alpha[m]*kubintkern(r,Add,Ad,a); /* spline approximation */
451 #elif defined(NF_QUADR)
452  result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
453 #elif defined(NF_LIN)
454  result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
455 #else
456  #error define interpolation method
457 #endif
458  }
459  }
460  }
461  return result;
462 }
463 
465 static double _Complex SearchBox(double *y, fastsum_plan *ths)
466 {
467  double _Complex val = 0.0;
468  int t, l;
469  int y_multiind[ths->d];
470  int multiindex[ths->d];
471  int y_ind;
472 
473  for (t=0; t < ths->d; t++)
474  {
475  y_multiind[t] = ((y[t] + 0.25 - ths->eps_B/2.0) / ths->eps_I);
476  }
477 
478  if (ths->d==1)
479  {
480  for (y_ind = max_i(0, y_multiind[0]-1); y_ind < ths->box_count_per_dim && y_ind <= y_multiind[0]+1; y_ind++){
481  val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha, ths->box_offset[y_ind], ths->box_offset[y_ind+1], ths->Add, ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
482  }
483  }
484  else if (ths->d==2)
485  {
486  for (multiindex[0] = max_i(0, y_multiind[0]-1); multiindex[0] < ths->box_count_per_dim && multiindex[0] <= y_multiind[0]+1; multiindex[0]++)
487  for (multiindex[1] = max_i(0, y_multiind[1]-1); multiindex[1] < ths->box_count_per_dim && multiindex[1] <= y_multiind[1]+1; multiindex[1]++)
488  {
489  y_ind = (ths->box_count_per_dim * multiindex[0]) + multiindex[1];
490  val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha, ths->box_offset[y_ind], ths->box_offset[y_ind+1], ths->Add, ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
491  }
492  }
493  else if(ths->d==3)
494  {
495  for (multiindex[0] = max_i(0, y_multiind[0]-1); multiindex[0] < ths->box_count_per_dim && multiindex[0] <= y_multiind[0]+1; multiindex[0]++)
496  for (multiindex[1] = max_i(0, y_multiind[1]-1); multiindex[1] < ths->box_count_per_dim && multiindex[1] <= y_multiind[1]+1; multiindex[1]++)
497  for (multiindex[2] = max_i(0, y_multiind[2]-1); multiindex[2] < ths->box_count_per_dim && multiindex[2] <= y_multiind[2]+1; multiindex[2]++)
498  {
499  y_ind = ((ths->box_count_per_dim * multiindex[0]) + multiindex[1]) * ths->box_count_per_dim + multiindex[2];
500  val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha, ths->box_offset[y_ind], ths->box_offset[y_ind+1], ths->Add, ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
501  }
502  }
503  else {
504  exit(-1);
505  }
506  return val;
507 }
508 
510 void BuildTree(int d, int t, double *x, double _Complex *alpha, int N)
511 {
512  if (N>1)
513  {
514  int m=N/2;
515 
516  quicksort(d,t,x,alpha,N);
517 
518  BuildTree(d, (t+1)%d, x, alpha, m);
519  BuildTree(d, (t+1)%d, x+(m+1)*d, alpha+(m+1), N-m-1);
520  }
521 }
522 
524 double _Complex SearchTree(const int d, const int t, const double *x,
525  const double _Complex *alpha, const double *xmin, const double *xmax,
526  const int N, const kernel k, const double *param, const int Ad,
527  const double _Complex *Add, const int p, const unsigned flags)
528 {
529  int m=N/2;
530  double Min=xmin[t], Max=xmax[t], Median=x[m*d+t];
531  double a=fabs(Max-Min)/2;
532  int l;
533  int E=0;
534  double r;
535 
536  if (N==0)
537  return 0.0;
538  if (Min>Median)
539  return SearchTree(d,(t+1)%d,x+(m+1)*d,alpha+(m+1),xmin,xmax,N-m-1,k,param,Ad,Add,p,flags);
540  else if (Max<Median)
541  return SearchTree(d,(t+1)%d,x,alpha,xmin,xmax,m,k,param,Ad,Add,p,flags);
542  else
543  {
544  double _Complex result = 0.0;
545  E=0;
546 
547  for (l=0; l<d; l++)
548  {
549  if ( x[m*d+l]>xmin[l] && x[m*d+l]<xmax[l] )
550  E++;
551  }
552 
553  if (E==d)
554  {
555  if (d==1)
556  {
557  r = xmin[0]+a-x[m]; /* remember: xmin+a = y */
558  }
559  else
560  {
561  r=0.0;
562  for (l=0; l<d; l++)
563  r+=(xmin[l]+a-x[m*d+l])*(xmin[l]+a-x[m*d+l]); /* remember: xmin+a = y */
564  r=sqrt(r);
565  }
566  if (fabs(r)<a)
567  {
568  result += alpha[m]*k(r,0,param); /* alpha*(kern-regkern) */
569  if (d==1)
570  {
571  if (flags & EXACT_NEARFIELD)
572  result -= alpha[m]*regkern1(k,r,p,param,a,1.0/16.0); /* exact value (in 1D) */
573  else
574  result -= alpha[m]*kubintkern1(r,Add,Ad,a); /* spline approximation */
575  }
576  else
577  {
578  if (flags & EXACT_NEARFIELD)
579  result -= alpha[m]*regkern(k,r,p,param,a,1.0/16.0); /* exact value (in dD) */
580  else
581 #if defined(NF_KUB)
582  result -= alpha[m]*kubintkern(r,Add,Ad,a); /* spline approximation */
583 #elif defined(NF_QUADR)
584  result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
585 #elif defined(NF_LIN)
586  result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
587 #else
588  #error define interpolation method
589 #endif
590  }
591  }
592  }
593  result += SearchTree(d,(t+1)%d,x+(m+1)*d,alpha+(m+1),xmin,xmax,N-m-1,k,param,Ad,Add,p,flags)
594  + SearchTree(d,(t+1)%d,x,alpha,xmin,xmax,m,k,param,Ad,Add,p,flags);
595  return result;
596  }
597 }
598 
600 void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, kernel k, double *param, unsigned flags, int nn, int m, int p, double eps_I, double eps_B)
601 {
602  int t;
603  int N[d], n[d];
604  int n_total;
605  int sort_flags_trafo = 0;
606  int sort_flags_adjoint = 0;
607 #ifdef _OPENMP
608  int nthreads = nfft_get_omp_num_threads();
609 #endif
610 
611  if (d > 1)
612  {
613  sort_flags_trafo = NFFT_SORT_NODES;
614 #ifdef _OPENMP
615  sort_flags_adjoint = NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT;
616 #else
617  sort_flags_adjoint = NFFT_SORT_NODES;
618 #endif
619  }
620 
621  ths->d = d;
622 
623  ths->N_total = N_total;
624  ths->M_total = M_total;
625 
626  ths->x = (double *)nfft_malloc(d*N_total*(sizeof(double)));
627  ths->alpha = (double _Complex *)nfft_malloc(N_total*(sizeof(double _Complex)));
628 
629  ths->y = (double *)nfft_malloc(d*M_total*(sizeof(double)));
630  ths->f = (double _Complex *)nfft_malloc(M_total*(sizeof(double _Complex)));
631 
632  ths->k = k;
633  ths->kernel_param = param;
634 
635  ths->flags = flags;
636 
637  ths->p = p;
638  ths->eps_I = eps_I; /* =(double)ths->p/(double)nn; */
639  ths->eps_B = eps_B; /* =1.0/16.0; */
642  if (!(ths->flags & EXACT_NEARFIELD))
643  {
644  if (ths->d==1)
645  {
646  ths->Ad = 4*(ths->p)*(ths->p);
647  ths->Add = (double _Complex *)nfft_malloc((ths->Ad+5)*(sizeof(double _Complex)));
648  }
649  else
650  {
651  if (ths->k == one_over_x)
652  {
653  double delta = 1e-8;
654  switch(p)
655  {
656  case 2: delta = 1e-3;
657  break;
658  case 3: delta = 1e-4;
659  break;
660  case 4: delta = 1e-5;
661  break;
662  case 5: delta = 1e-6;
663  break;
664  case 6: delta = 1e-6;
665  break;
666  case 7: delta = 1e-7;
667  break;
668  default: delta = 1e-8;
669  }
670 
671 #if defined(NF_KUB)
672  ths->Ad = max_i(10, (int) ceil(1.4/pow(delta,1.0/4.0)));
673  ths->Add = (double _Complex *)nfft_malloc((ths->Ad+3)*(sizeof(double _Complex)));
674 #elif defined(NF_QUADR)
675  ths->Ad = (int) ceil(2.2/pow(delta,1.0/3.0));
676  ths->Add = (double _Complex *)nfft_malloc((ths->Ad+3)*(sizeof(double _Complex)));
677 #elif defined(NF_LIN)
678  ths->Ad = (int) ceil(1.7/pow(delta,1.0/2.0));
679  ths->Add = (double _Complex *)nfft_malloc((ths->Ad+3)*(sizeof(double _Complex)));
680 #else
681  #error define NF_LIN or NF_QUADR or NF_KUB
682 #endif
683  }
684  else
685  {
686  ths->Ad = 2*(ths->p)*(ths->p);
687  ths->Add = (double _Complex *)nfft_malloc((ths->Ad+3)*(sizeof(double _Complex)));
688  }
689  }
690  }
691 
693  ths->n = nn;
694  for (t=0; t<d; t++)
695  {
696  N[t] = nn;
697  n[t] = 2*nn;
698  }
699  nfft_init_guru(&(ths->mv1), d, N, N_total, n, m,
700  sort_flags_adjoint |
701  PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
702  FFTW_MEASURE| FFTW_DESTROY_INPUT);
703  nfft_init_guru(&(ths->mv2), d, N, M_total, n, m,
704  sort_flags_trafo |
705  PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
706  FFTW_MEASURE| FFTW_DESTROY_INPUT);
707 
709  n_total = 1;
710  for (t=0; t<d; t++)
711  n_total *= nn;
712 
713  ths->b = (fftw_complex *)nfft_malloc(n_total*sizeof(fftw_complex));
714 #ifdef _OPENMP
715 #pragma omp critical (nfft_omp_critical_fftw_plan)
716 {
717  fftw_plan_with_nthreads(nthreads);
718 #endif
719 
720  ths->fft_plan = fftw_plan_dft(d,N,ths->b,ths->b,FFTW_FORWARD,FFTW_ESTIMATE);
721 
722 #ifdef _OPENMP
723 }
724 #endif
725 
726  if (ths->flags & NEARFIELD_BOXES)
727  {
728  ths->box_count_per_dim = floor((0.5 - ths->eps_B) / ths->eps_I) + 1;
729  ths->box_count = 1;
730  for (t=0; t<ths->d; t++)
731  ths->box_count *= ths->box_count_per_dim;
732 
733  ths->box_offset = (int *) nfft_malloc((ths->box_count+1) * sizeof(int));
734 
735  ths->box_alpha = (double _Complex *)nfft_malloc(ths->N_total*(sizeof(double _Complex)));
736 
737  ths->box_x = (double *) nfft_malloc(ths->d * ths->N_total * sizeof(double));
738  }
739 }
740 
743 {
744  nfft_free(ths->x);
745  nfft_free(ths->alpha);
746  nfft_free(ths->y);
747  nfft_free(ths->f);
748 
749  if (!(ths->flags & EXACT_NEARFIELD))
750  nfft_free(ths->Add);
751 
752  nfft_finalize(&(ths->mv1));
753  nfft_finalize(&(ths->mv2));
754 
755 #ifdef _OPENMP
756 #pragma omp critical (nfft_omp_critical_fftw_plan)
757 {
758 #endif
759  fftw_destroy_plan(ths->fft_plan);
760 #ifdef _OPENMP
761 }
762 #endif
763 
764  nfft_free(ths->b);
765 
766  if (ths->flags & NEARFIELD_BOXES)
767  {
768  nfft_free(ths->box_offset);
769  nfft_free(ths->box_alpha);
770  nfft_free(ths->box_x);
771  }
772 }
773 
776 {
777  int j,k;
778  int t;
779  double r;
780 
781  #pragma omp parallel for default(shared) private(j,k,t,r)
782  for (j=0; j<ths->M_total; j++)
783  {
784  ths->f[j]=0.0;
785  for (k=0; k<ths->N_total; k++)
786  {
787  if (ths->d==1)
788  r = ths->y[j] - ths->x[k];
789  else
790  {
791  r=0.0;
792  for (t=0; t<ths->d; t++)
793  r += (ths->y[j*ths->d+t]-ths->x[k*ths->d+t])*(ths->y[j*ths->d+t]-ths->x[k*ths->d+t]);
794  r=sqrt(r);
795  }
796  ths->f[j] += ths->alpha[k] * ths->k(r,0,ths->kernel_param);
797  }
798  }
799 }
800 
803 {
804  int j,k,t;
805  int n_total;
806  ticks t0, t1;
807 
808  ths->MEASURE_TIME_t[0] = 0.0;
809  ths->MEASURE_TIME_t[1] = 0.0;
810  ths->MEASURE_TIME_t[2] = 0.0;
811  ths->MEASURE_TIME_t[3] = 0.0;
812 
813 #ifdef MEASURE_TIME
814  t0 = getticks();
815 #endif
816 
817 
818  if (ths->flags & NEARFIELD_BOXES)
819  {
820  BuildBox(ths);
821  }
822  else
823  {
825  BuildTree(ths->d,0,ths->x,ths->alpha,ths->N_total);
826  }
827 
828 #ifdef MEASURE_TIME
829  t1 = getticks();
830  ths->MEASURE_TIME_t[3] += nfft_elapsed_seconds(t1,t0);
831 #endif
832 
833 
834 #ifdef MEASURE_TIME
835  t0 = getticks();
836 #endif
837 
838  if (!(ths->flags & EXACT_NEARFIELD))
839  {
840  if (ths->d==1)
841  #pragma omp parallel for default(shared) private(k)
842  for (k=-ths->Ad/2-2; k <= ths->Ad/2+2; k++)
843  ths->Add[k+ths->Ad/2+2] = regkern1(ths->k, ths->eps_I*(double)k/ths->Ad*2, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
844  else
845  #pragma omp parallel for default(shared) private(k)
846  for (k=0; k <= ths->Ad+2; k++)
847  ths->Add[k] = regkern3(ths->k, ths->eps_I*(double)k/ths->Ad, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
848  }
849 #ifdef MEASURE_TIME
850  t1 = getticks();
851  ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
852 #endif
853 
854 
855 #ifdef MEASURE_TIME
856  t0 = getticks();
857 #endif
858 
859  for (k=0; k<ths->mv1.M_total; k++)
860  for (t=0; t<ths->mv1.d; t++)
861  ths->mv1.x[ths->mv1.d*k+t] = - ths->x[ths->mv1.d*k+t]; /* note the factor -1 for transposed transform instead of adjoint*/
862 
864  if(ths->mv1.nfft_flags & PRE_LIN_PSI)
865  nfft_precompute_lin_psi(&(ths->mv1));
866 
867  if(ths->mv1.nfft_flags & PRE_PSI)
868  nfft_precompute_psi(&(ths->mv1));
869 
870  if(ths->mv1.nfft_flags & PRE_FULL_PSI)
871  nfft_precompute_full_psi(&(ths->mv1));
872 #ifdef MEASURE_TIME
873  t1 = getticks();
874  ths->MEASURE_TIME_t[1] += nfft_elapsed_seconds(t1,t0);
875 #endif
876 
878  for(k=0; k<ths->mv1.M_total;k++)
879  ths->mv1.f[k] = ths->alpha[k];
880 
881 #ifdef MEASURE_TIME
882  t0 = getticks();
883 #endif
884 
885  for (j=0; j<ths->mv2.M_total; j++)
886  for (t=0; t<ths->mv2.d; t++)
887  ths->mv2.x[ths->mv2.d*j+t] = - ths->y[ths->mv2.d*j+t]; /* note the factor -1 for conjugated transform instead of standard*/
888 
890  if(ths->mv2.nfft_flags & PRE_LIN_PSI)
891  nfft_precompute_lin_psi(&(ths->mv2));
892 
893  if(ths->mv2.nfft_flags & PRE_PSI)
894  nfft_precompute_psi(&(ths->mv2));
895 
896  if(ths->mv2.nfft_flags & PRE_FULL_PSI)
897  nfft_precompute_full_psi(&(ths->mv2));
898 #ifdef MEASURE_TIME
899  t1 = getticks();
900  ths->MEASURE_TIME_t[2] += nfft_elapsed_seconds(t1,t0);
901 #endif
902 
903 
904 #ifdef MEASURE_TIME
905  t0 = getticks();
906 #endif
907 
908  n_total = 1;
909  for (t=0; t<ths->d; t++)
910  n_total *= ths->n;
911 
912  #pragma omp parallel for default(shared) private(j,k,t)
913  for (j=0; j<n_total; j++)
914  {
915  if (ths->d==1)
916  ths->b[j] = regkern1(ths->k, (double)j / (ths->n) - 0.5, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
917  else
918  {
919  k=j;
920  ths->b[j]=0.0;
921  for (t=0; t<ths->d; t++)
922  {
923  ths->b[j] += ((double)(k % (ths->n)) / (ths->n) - 0.5) * ((double)(k % (ths->n)) / (ths->n) - 0.5);
924  k = k / (ths->n);
925  }
926  ths->b[j] = regkern3(ths->k, sqrt(ths->b[j]), ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
927  }
928  }
929 
930  nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
931  fftw_execute(ths->fft_plan);
932  nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
933 #ifdef MEASURE_TIME
934  t1 = getticks();
935  ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
936 #endif
937 }
938 
941 {
942  int j,k,t;
943  ticks t0, t1;
944 
945  ths->MEASURE_TIME_t[4] = 0.0;
946  ths->MEASURE_TIME_t[5] = 0.0;
947  ths->MEASURE_TIME_t[6] = 0.0;
948  ths->MEASURE_TIME_t[7] = 0.0;
949 
950 #ifdef MEASURE_TIME
951  t0 = getticks();
952 #endif
953 
954  nfft_adjoint(&(ths->mv1));
955 #ifdef MEASURE_TIME
956  t1 = getticks();
957  ths->MEASURE_TIME_t[4] += nfft_elapsed_seconds(t1,t0);
958 #endif
959 
960 
961 #ifdef MEASURE_TIME
962  t0 = getticks();
963 #endif
964 
965  #pragma omp parallel for default(shared) private(k)
966  for (k=0; k<ths->mv2.N_total; k++)
967  ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k];
968 #ifdef MEASURE_TIME
969  t1 = getticks();
970  ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0);
971 #endif
972 
973 
974 #ifdef MEASURE_TIME
975  t0 = getticks();
976 #endif
977 
978  nfft_trafo(&(ths->mv2));
979 #ifdef MEASURE_TIME
980  t1 = getticks();
981  ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0);
982 #endif
983 
984 
985 #ifdef MEASURE_TIME
986  t0 = getticks();
987 #endif
988 
989  #pragma omp parallel for default(shared) private(j,k,t)
990  for (j=0; j<ths->M_total; j++)
991  {
992  double ymin[ths->d], ymax[ths->d];
994  if (ths->flags & NEARFIELD_BOXES)
995  {
996  ths->f[j] = ths->mv2.f[j] + SearchBox(ths->y + ths->d*j, ths);
997  }
998  else
999  {
1000  for (t=0; t<ths->d; t++)
1001  {
1002  ymin[t] = ths->y[ths->d*j+t] - ths->eps_I;
1003  ymax[t] = ths->y[ths->d*j+t] + ths->eps_I;
1004  }
1005  ths->f[j] = ths->mv2.f[j] + SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags);
1006  }
1007  /* ths->f[j] = ths->mv2.f[j]; */
1008  /* ths->f[j] = SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); */
1009  }
1010 
1011 #ifdef MEASURE_TIME
1012  t1 = getticks();
1013  ths->MEASURE_TIME_t[7] += nfft_elapsed_seconds(t1,t0);
1014 #endif
1015 }
1016 /* \} */
1017 
1018 /* fastsum.c */

Generated on Tue Apr 30 2013 by Doxygen 1.8.1