NFFT Logo 3.2.3
nfsft_benchomp_detail.c
1 /*
2  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id: simple_test.c 3372 2009-10-21 06:04:05Z skunis $ */
20 
21 #include <stdio.h>
22 #include <math.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <complex.h>
26 
27 #include "nfft3util.h"
28 #include "nfft3.h"
29 #include "infft.h"
30 #ifdef _OPENMP
31 #include <omp.h>
32 #endif
33 
34 void bench_openmp_readfile(FILE *infile, int *trafo_adjoint, int *N, int *M, double **x, C **f_hat, C **f)
35 {
36  double re,im;
37  int k, n, j, t;
38  nfsft_plan plan;
39 
40  fscanf(infile, "%d %d %d", trafo_adjoint, N, M);
41  *x = (double *)nfft_malloc(2*(*M)*sizeof(double));
42  *f_hat = (C*)nfft_malloc((2*(*N)+2) * (2*(*N)+2) * sizeof(C));
43  *f = (C*)nfft_malloc((*M)*sizeof(C));
44 
45  memset(*f_hat,0U,(2*(*N)+2) * (2*(*N)+2) * sizeof(C));
46  memset(*f,0U,(*M)*sizeof(C));
47 
48 #ifdef _OPENMP
49  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
50 #else
51  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
52 #endif
53 
54  nfsft_init_guru(&plan, *N, *M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
55  NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
56  PRE_PHI_HUT | FFTW_INIT | FFT_OUT_OF_PLACE, 6);
57 
58 #ifdef _OPENMP
59  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
60 #else
61  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
62 #endif
63 
64  for (j=0; j < *M; j++)
65  for (t=0; t < 2; t++)
66  fscanf(infile, "%lg", (*x)+2*j+t);
67 
68  if (trafo_adjoint==0)
69  {
70  for (k = 0; k <= *N; k++)
71  for (n = -k; n <= k; n++)
72  {
73  fscanf(infile, "%lg %lg", &re, &im);
74  (*f_hat)[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
75  }
76  }
77  else
78  {
79  for (j=0; j < *M; j++)
80  {
81  fscanf(infile, "%lg %lg", &re, &im);
82  (*f)[j] = re + _Complex_I * im;
83  }
84  }
85 
86  nfsft_finalize(&plan);
87 }
88 
89 void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags)
90 {
91  nfsft_plan plan;
92  int k, n;
93 // int N, M, trafo_adjoint;
94  int t, j;
95  ticks t0, t1;
96  double tt_total, tt_pre;
97 
98 // fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M);
99 
100 /*#ifdef _OPENMP
101  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
102 #else
103  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
104 #endif*/
105 
106  /* precomputation (for fast polynomial transform) */
107 // nfsft_precompute(N,1000.0,0U,0U);
108 
109  /* Initialize transform plan using the guru interface. All input and output
110  * arrays are allocated by nfsft_init_guru(). Computations are performed with
111  * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical
112  * Fourier coefficients is preserved during transformations. The NFFT uses a
113  * cut-off parameter m = 6. See the NFFT 3 manual for details.
114  */
115  nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F |
116  NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
117  PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m);
118 
119 /*#ifdef _OPENMP
120  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
121 #else
122  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
123 #endif*/
124 
125  for (j=0; j < plan.M_total; j++)
126  {
127  for (t=0; t < 2; t++)
128  // fscanf(infile, "%lg", plan.x+2*j+t);
129  plan.x[2*j+t] = x[2*j+t];
130  }
131 
132  if (trafo_adjoint==0)
133  {
134  memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
135  for (k = 0; k <= plan.N; k++)
136  for (n = -k; n <= k; n++)
137  {
138 // fscanf(infile, "%lg %lg", &re, &im);
139 // plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
140  plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)];
141  }
142  }
143  else
144  {
145  for (j=0; j < plan.M_total; j++)
146  {
147 // fscanf(infile, "%lg %lg", &re, &im);
148 // plan.f[j] = re + _Complex_I * im;
149  plan.f[j] = f[j];
150  }
151 
152  memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
153  }
154 
155  t0 = getticks();
156  /* precomputation (for NFFT, node-dependent) */
157  nfsft_precompute_x(&plan);
158  t1 = getticks();
159  tt_pre = nfft_elapsed_seconds(t1,t0);
160 
161  if (trafo_adjoint==0)
162  nfsft_trafo(&plan);
163  else
164  nfsft_adjoint(&plan);
165  t1 = getticks();
166  tt_total = nfft_elapsed_seconds(t1,t0);
167 
168 #ifndef MEASURE_TIME
169  plan.MEASURE_TIME_t[0] = 0.0;
170  plan.MEASURE_TIME_t[2] = 0.0;
171 #endif
172 
173 #ifndef MEASURE_TIME_FFTW
174  plan.MEASURE_TIME_t[1] = 0.0;
175 #endif
176 
177  printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total);
178 
180  nfsft_finalize(&plan);
181 }
182 
183 int main(int argc, char **argv)
184 {
185  int m, nfsft_flags, psi_flags;
186  int nrepeat;
187  int trafo_adjoint, N, M, r;
188  double *x;
189  C *f_hat, *f;
190 #ifdef _OPENMP
191  int nthreads;
192 
193  if (argc != 6)
194  return 1;
195 
196  nthreads = atoi(argv[5]);
197  fftw_init_threads();
198  omp_set_num_threads(nthreads);
199 #else
200  if (argc != 5)
201  return 1;
202 #endif
203 
204  m = atoi(argv[1]);
205  nfsft_flags = atoi(argv[2]);
206  psi_flags = atoi(argv[3]);
207  nrepeat = atoi(argv[4]);
208 
209  bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f);
210 
211  /* precomputation (for fast polynomial transform) */
212  nfsft_precompute(N,1000.0,0U,0U);
213 
214  for (r = 0; r < nrepeat; r++)
215  bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags);
216 
217  return 0;
218 }

Generated on Tue Apr 30 2013 by Doxygen 1.8.1