NFFT Logo 3.2.3
fastgauss.c
1 /*
2  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id: fastgauss.c 3896 2012-10-10 12:19:26Z tovo $ */
20 
27 #include "config.h"
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <math.h>
32 #ifdef HAVE_COMPLEX_H
33  #include <complex.h>
34 #endif
35 
36 #include "nfft3.h"
37 #include "nfft3util.h"
38 #include "infft.h"
39 
48 #define DGT_PRE_CEXP (1U<< 0)
49 
59 #define FGT_NDFT (1U<< 1)
60 
69 #define FGT_APPROX_B (1U<< 2)
70 
72 typedef struct
73 {
74  int N;
75  int M;
77  double _Complex *alpha;
78  double _Complex *f;
80  unsigned flags;
83  double _Complex sigma;
85  double *x;
86  double *y;
88  double _Complex *pre_cexp;
90  int n;
91  double p;
93  double _Complex *b;
98 } fgt_plan;
99 
107 void dgt_trafo(fgt_plan *ths)
108 {
109  int j,k,l;
110 
111  for(j=0; j<ths->M; j++)
112  ths->f[j] = 0;
113 
114  if(ths->flags & DGT_PRE_CEXP)
115  for(j=0,l=0; j<ths->M; j++)
116  for(k=0; k<ths->N; k++,l++)
117  ths->f[j] += ths->alpha[k]*ths->pre_cexp[l];
118  else
119  for(j=0; j<ths->M; j++)
120  for(k=0; k<ths->N; k++)
121  ths->f[j] += ths->alpha[k]*cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
122  (ths->y[j]-ths->x[k]));
123 }
124 
132 void fgt_trafo(fgt_plan *ths)
133 {
134  int l;
135 
136  if(ths->flags & FGT_NDFT)
137  {
138  nfft_adjoint_direct(ths->nplan1);
139 
140  for(l=0; l<ths->n; l++)
141  ths->nplan1->f_hat[l] *= ths->b[l];
142 
143  nfft_trafo_direct(ths->nplan2);
144  }
145  else
146  {
147  nfft_adjoint(ths->nplan1);
148 
149  for(l=0; l<ths->n; l++)
150  ths->nplan1->f_hat[l] *= ths->b[l];
151 
152  nfft_trafo(ths->nplan2);
153  }
154 }
155 
170 void fgt_init_guru(fgt_plan *ths, int N, int M, double _Complex sigma, int n,
171  double p, int m, unsigned flags)
172 {
173  int j,n_fftw;
174  fftw_plan fplan;
175 
176  ths->M = M;
177  ths->N = N;
178  ths->sigma = sigma;
179  ths->flags = flags;
180 
181  ths->x = (double*)nfft_malloc(ths->N*sizeof(double));
182  ths->y = (double*)nfft_malloc(ths->M*sizeof(double));
183  ths->alpha = (double _Complex*)nfft_malloc(ths->N*sizeof(double _Complex));
184  ths->f = (double _Complex*)nfft_malloc(ths->M*sizeof(double _Complex));
185 
186  ths->n = n;
187  ths->p = p;
188 
189  ths->b = (double _Complex*)nfft_malloc(ths->n*sizeof(double _Complex));
190 
191  ths->nplan1 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
192  ths->nplan2 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
193 
194  n_fftw=X(next_power_of_2)(2*ths->n);
195 
196  nfft_init_guru(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT|
197  PRE_PSI| MALLOC_X| MALLOC_F_HAT| FFTW_INIT, FFTW_MEASURE);
198  nfft_init_guru(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT|
199  PRE_PSI| MALLOC_X| FFTW_INIT, FFTW_MEASURE);
200 
201  ths->nplan1->f = ths->alpha;
202  ths->nplan2->f_hat = ths->nplan1->f_hat;
203  ths->nplan2->f = ths->f;
204 
205  if(ths->flags & FGT_APPROX_B)
206  {
207  fplan = fftw_plan_dft_1d(ths->n, ths->b, ths->b, FFTW_FORWARD,
208  FFTW_MEASURE);
209 
210  for(j=0; j<ths->n; j++)
211  ths->b[j] = cexp(-ths->p*ths->p*ths->sigma*(j-ths->n/2)*(j-ths->n/2)/
212  ((double)ths->n*ths->n)) / ths->n;
213 
214  nfft_fftshift_complex(ths->b, 1, &ths->n);
215  fftw_execute(fplan);
216  nfft_fftshift_complex(ths->b, 1, &ths->n);
217 
218  fftw_destroy_plan(fplan);
219  }
220  else
221  {
222  for(j=0; j<ths->n; j++)
223  ths->b[j] = 1.0/ths->p * csqrt(PI/ths->sigma)*
224  cexp(-PI*PI*(j-ths->n/2)*(j-ths->n/2)/
225  (ths->p*ths->p*ths->sigma));
226  }
227 }
228 
240 void fgt_init(fgt_plan *ths, int N, int M, double _Complex sigma, double eps)
241 {
242  double p;
243  int n;
244 
245  p=0.5+sqrt(-log(eps)/creal(sigma));
246  if(p<1)
247  p=1;
248 
249  n=2*((int)ceil(p*cabs(sigma)/PI * sqrt(-log(eps)/creal(sigma))));
250 
251  if(N*M<=((int)(1U<<20)))
252  fgt_init_guru(ths, N, M, sigma, n, p, 7, DGT_PRE_CEXP);
253  else
254  fgt_init_guru(ths, N, M, sigma, n, p, 7, 0);
255 }
256 
264 {
265  int j,k,l;
266 
267  if(ths->flags & DGT_PRE_CEXP)
268  {
269  ths->pre_cexp=(double _Complex*)nfft_malloc(ths->M*ths->N*
270  sizeof(double _Complex));
271 
272  for(j=0,l=0; j<ths->M; j++)
273  for(k=0; k<ths->N; k++,l++)
274  ths->pre_cexp[l]=cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
275  (ths->y[j]-ths->x[k]));
276  }
277 
278  for(j=0; j<ths->nplan1->M_total; j++)
279  ths->nplan1->x[j] = ths->x[j]/ths->p;
280  for(j=0; j<ths->nplan2->M_total; j++)
281  ths->nplan2->x[j] = ths->y[j]/ths->p;
282 
283  if(ths->nplan1->nfft_flags & PRE_PSI)
284  nfft_precompute_psi(ths->nplan1);
285  if(ths->nplan2->nfft_flags & PRE_PSI)
286  nfft_precompute_psi(ths->nplan2);
287 }
288 
296 {
297  nfft_finalize(ths->nplan2);
298  nfft_finalize(ths->nplan1);
299 
300  nfft_free(ths->nplan2);
301  nfft_free(ths->nplan1);
302 
303  nfft_free(ths->b);
304 
305  nfft_free(ths->f);
306  nfft_free(ths->y);
307 
308  nfft_free(ths->alpha);
309  nfft_free(ths->x);
310 }
311 
319 {
320  int j,k;
321 
322  for(k=0; k<ths->N; k++)
323  ths->x[k] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
324 
325  for(j=0; j<ths->M; j++)
326  ths->y[j] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
327 
328  for(k=0; k<ths->N; k++)
329  ths->alpha[k] = (double)rand()/(RAND_MAX)-1.0/2.0
330  + _Complex_I*(double)rand()/(RAND_MAX)-I/2.0;
331 }
332 
341 double fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
342 {
343  int r;
344  ticks t0, t1;
345  double t_out;
346  double tau=0.01;
347 
348  t_out=0;
349  r=0;
350  while(t_out<tau)
351  {
352  r++;
353  t0 = getticks();
354  if (dgt)
355  dgt_trafo(ths);
356  else
357  fgt_trafo(ths);
358  t1 = getticks();
359  t_out += nfft_elapsed_seconds(t1,t0);
360  }
361  t_out/=r;
362 
363  return t_out;
364 }
365 
375 void fgt_test_simple(int N, int M, double _Complex sigma, double eps)
376 {
377  fgt_plan my_plan;
378  double _Complex *swap_dgt;
379 
380  fgt_init(&my_plan, N, M, sigma, eps);
381  swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*sizeof(double _Complex));
382 
383  fgt_test_init_rand(&my_plan);
384  fgt_init_node_dependent(&my_plan);
385 
386  NFFT_SWAP_complex(swap_dgt,my_plan.f);
387  dgt_trafo(&my_plan);
388  nfft_vpr_complex(my_plan.f,my_plan.M,"discrete gauss transform");
389  NFFT_SWAP_complex(swap_dgt,my_plan.f);
390 
391  fgt_trafo(&my_plan);
392  nfft_vpr_complex(my_plan.f,my_plan.M,"fast gauss transform");
393 
394  printf("\n relative error: %1.3e\n", X(error_l_infty_1_complex)(swap_dgt,
395  my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
396 
397  nfft_free(swap_dgt);
398  fgt_finalize(&my_plan);
399 }
400 
411 {
412  fgt_plan my_plan;
413  double _Complex *swap_dgt;
414  int N;
415 
416  double _Complex sigma=4*(138+ _Complex_I*100);
417  int n=128;
418  int N_dgt_pre_exp=(int)(1U<<11);
419  int N_dgt=(int)(1U<<19);
420 
421  printf("n=%d, sigma=%1.3e+i%1.3e\n",n,creal(sigma),cimag(sigma));
422 
423  for(N=((int)(1U<<6)); N<((int)(1U<<22)); N=N<<1)
424  {
425  printf("$%d$\t & ",N);
426 
427  if(N<N_dgt_pre_exp)
428  fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, DGT_PRE_CEXP);
429  else
430  fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, 0);
431 
432  swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*
433  sizeof(double _Complex));
434 
435  fgt_test_init_rand(&my_plan);
436 
437  fgt_init_node_dependent(&my_plan);
438 
439  if(N<N_dgt)
440  {
441  NFFT_SWAP_complex(swap_dgt,my_plan.f);
442  if(N<N_dgt_pre_exp)
443  my_plan.flags^=DGT_PRE_CEXP;
444 
445  printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
446  if(N<N_dgt_pre_exp)
447  my_plan.flags^=DGT_PRE_CEXP;
448  NFFT_SWAP_complex(swap_dgt,my_plan.f);
449  }
450  else
451  printf("\t\t & ");
452 
453  if(N<N_dgt_pre_exp)
454  printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
455  else
456  printf("\t\t & ");
457 
458  my_plan.flags^=FGT_NDFT;
459  printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
460  my_plan.flags^=FGT_NDFT;
461 
462  printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
463 
464  printf("$%1.1e$\t \\\\ \n",
465  X(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M,
466  my_plan.alpha, my_plan.N));
467  fflush(stdout);
468 
469  nfft_free(swap_dgt);
470  fgt_finalize(&my_plan);
471  fftw_cleanup();
472  }
473 }
474 
481 void fgt_test_error(void)
482 {
483  fgt_plan my_plan;
484  double _Complex *swap_dgt;
485  int n,mi;
486 
487  double _Complex sigma=4*(138+ _Complex_I*100);
488  int N=1000;
489  int M=1000;
490  int m[2]={7,3};
491 
492  printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
493  printf("error=[\n");
494 
495  swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
496 
497  for(n=8; n<=128; n+=4)
498  {
499  printf("%d\t",n);
500  for(mi=0;mi<2;mi++)
501  {
502  fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
503  fgt_test_init_rand(&my_plan);
504  fgt_init_node_dependent(&my_plan);
505 
506  NFFT_SWAP_complex(swap_dgt,my_plan.f);
507  dgt_trafo(&my_plan);
508  NFFT_SWAP_complex(swap_dgt,my_plan.f);
509 
510  fgt_trafo(&my_plan);
511 
512  printf("%1.3e\t", X(error_l_infty_1_complex)(swap_dgt, my_plan.f,
513  my_plan.M, my_plan.alpha, my_plan.N));
514  fflush(stdout);
515 
516  fgt_finalize(&my_plan);
517  fftw_cleanup();
518  }
519  printf("\n");
520  }
521  printf("];\n");
522 
523  nfft_free(swap_dgt);
524 }
525 
533 {
534  fgt_plan my_plan;
535  double _Complex *swap_dgt;
536  int n,pi;
537 
538  double _Complex sigma=20+40*_Complex_I;
539  int N=1000;
540  int M=1000;
541  double p[3]={1,1.5,2};
542 
543  printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
544  printf("error=[\n");
545 
546  swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
547 
548  for(n=8; n<=128; n+=4)
549  {
550  printf("%d\t",n);
551  for(pi=0;pi<3;pi++)
552  {
553  fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
554  fgt_test_init_rand(&my_plan);
555  fgt_init_node_dependent(&my_plan);
556 
557  NFFT_SWAP_complex(swap_dgt,my_plan.f);
558  dgt_trafo(&my_plan);
559  NFFT_SWAP_complex(swap_dgt,my_plan.f);
560 
561  fgt_trafo(&my_plan);
562 
563  printf("%1.3e\t", X(error_l_infty_1_complex)(swap_dgt, my_plan.f,
564  my_plan.M, my_plan.alpha, my_plan.N));
565  fflush(stdout);
566 
567  fgt_finalize(&my_plan);
568  fftw_cleanup();
569  }
570  printf("\n");
571  }
572  printf("];\n");
573 }
574 
580 int main(int argc,char **argv)
581 {
582  if(argc!=2)
583  {
584  fprintf(stderr,"fastgauss type\n");
585  fprintf(stderr," type\n");
586  fprintf(stderr," 0 - Simple test.\n");
587  fprintf(stderr," 1 - Compares accuracy and execution time.\n");
588  fprintf(stderr," Pipe to output_andersson.tex\n");
589  fprintf(stderr," 2 - Compares accuracy.\n");
590  fprintf(stderr," Pipe to output_error.m\n");
591  fprintf(stderr," 3 - Compares accuracy.\n");
592  fprintf(stderr," Pipe to output_error_p.m\n");
593  return -1;
594  }
595 
596  if(atoi(argv[1])==0)
597  fgt_test_simple(10, 10, 5+3*_Complex_I, 0.001);
598 
599  if(atoi(argv[1])==1)
601 
602  if(atoi(argv[1])==2)
603  fgt_test_error();
604 
605  if(atoi(argv[1])==3)
607 
608  return 1;
609 }
610 /* \} */

Generated on Tue Apr 30 2013 by Doxygen 1.8.1