NFFT Logo 3.2.3
nfsft_benchomp.c
1 /*
2  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <unistd.h>
22 
23 #include "config.h"
24 
25 #include <nfft3.h>
26 #include <nfft3util.h>
27 
28 #define NREPEAT 5
29 
30 static FILE* file_out_tex = NULL;
31 
32 int get_nthreads_array(int **arr)
33 {
34  int max_threads = nfft_get_omp_num_threads();
35  int alloc_num = 2;
36  int k;
37  int ret_number = 0;
38  int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
39 
40  if (max_threads <= 5)
41  {
42  *arr = (int*) malloc(max_threads*sizeof(int));
43  for (k = 0; k < max_threads; k++)
44  *(*arr + k) = k+1;
45  return max_threads;
46  }
47 
48  for (k = 1; k <= max_threads; k*=2, alloc_num++);
49 
50  *arr = (int*) malloc(alloc_num*sizeof(int));
51 
52  for (k = 1; k <= max_threads; k*=2)
53  {
54  if (k != max_threads && 2*k > max_threads && max_threads_pw2)
55  {
56  *(*arr + ret_number) = max_threads/2;
57  ret_number++;
58  }
59 
60  *(*arr + ret_number) = k;
61  ret_number++;
62 
63  if (k != max_threads && 2*k > max_threads)
64  {
65  *(*arr + ret_number) = max_threads;
66  ret_number++;
67  break;
68  }
69  }
70 
71  return ret_number;
72 }
73 
74 
75 void check_result_value(const int val, const int ok, const char *msg)
76 {
77  if (val != ok)
78  {
79  fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
80 
81  exit(1);
82  }
83 }
84 
85 void run_test_create(int trafo_adjoint, int N, int M)
86 {
87  char cmd[1025];
88 
89  snprintf(cmd, 1024, "./nfsft_benchomp_createdataset %d %d %d > nfsft_benchomp_test.data", trafo_adjoint, N, M);
90  fprintf(stderr, "%s\n", cmd);
91  check_result_value(system(cmd), 0, "createdataset");
92 }
93 
94 void run_test_init_output()
95 {
96  FILE *f = fopen("nfsft_benchomp_test.result", "w");
97  if (f!= NULL)
98  fclose(f);
99 }
100 
101 typedef struct
102 {
103  int trafo_adjoint;
104  int N;
105  int M;
106  int m;
107  int nfsft_flags;
108  int psi_flags;
109 } s_param;
110 
111 typedef struct
112 {
113  double avg;
114  double min;
115  double max;
116 } s_resval;
117 
118 typedef struct
119 {
120  int nthreads;
121  s_resval resval[6];
122 } s_result;
123 
124 typedef struct
125 {
126  s_param param;
127  s_result *results;
128  int nresults;
129 } s_testset;
130 
131 void run_test(s_resval *res, int nrepeat, int m, int nfsft_flags, int psi_flags, int nthreads)
132 {
133  FILE *f;
134  char cmd[1025];
135  int r,t;
136 
137  for (t = 0; t < 6; t++)
138  {
139  res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
140  }
141 
142  if (nthreads < 2)
143  snprintf(cmd, 1024, "./nfsft_benchomp_detail_single %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", m, nfsft_flags, psi_flags, nrepeat);
144  else
145  snprintf(cmd, 1024, "./nfsft_benchomp_detail_threads %d %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", m, nfsft_flags, psi_flags, nrepeat, nthreads);
146  fprintf(stderr, "%s\n", cmd);
147 
148  check_result_value(system(cmd), 0, cmd);
149 
150  f = fopen("nfsft_benchomp_test.out", "r");
151  for (r = 0; r < nrepeat; r++)
152  {
153  int retval;
154  double v[6];
155 // FILE *f;
156 // check_result_value(system(cmd), 0, cmd);
157 // f = fopen("nfsft_benchomp_test.out", "r");
158  retval = fscanf(f, "%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
159  check_result_value(retval, 6, "read nfsft_benchomp_test.out");
160 // fclose(f);
161 // fprintf(stderr, "%.3e %.3e %.3e %.3e %.3e %.3e\n", v[0], v[1], v[2], v[3], v[4], v[5]);
162  for (t = 0; t < 6; t++)
163  {
164  res[t].avg += v[t];
165  if (res[t].min > v[t])
166  res[t].min = v[t];
167  if (res[t].max < v[t])
168  res[t].max = v[t];
169  }
170  }
171  fclose(f);
172 
173  for (t = 0; t < 6; t++)
174  res[t].avg /= nrepeat;
175 
176  fprintf(stderr, "%d %d: ", nthreads, nrepeat);
177  for (t = 0; t < 6; t++)
178  fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
179  fprintf(stderr, "\n");
180 }
181 
182 const char *get_psi_string(int flags)
183 {
184  if (flags & PRE_PSI)
185  return "prepsi";
186  else if (flags & PRE_ONE_PSI)
187  return "unknownPSI";
188 
189  return "nopsi";
190 }
191 const char *get_sort_string(int flags)
192 {
193  if (flags & NFFT_SORT_NODES)
194  return "sorted";
195 
196  return "unsorted";
197 }
198 
199 const char *get_adjoint_omp_string(int flags)
200 {
201  if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
202  return "blockwise";
203 
204  return "";
205 }
206 
207 #define MASK_TA (1U<<1)
208 #define MASK_N (1U<<2)
209 #define MASK_M (1U<<4)
210 #define MASK_WINM (1U<<5)
211 #define MASK_FLAGS_PSI (1U<<6)
212 #define MASK_FLAGS_SORT (1U<<7)
213 #define MASK_FLAGS_BW (1U<<8)
214 #define MASK_FLAGS_FPT (1U<<9)
215 
216 unsigned int determine_different_parameters(s_testset *testsets, int ntestsets)
217 {
218  int t;
219  unsigned int mask = 0;
220 
221  if (ntestsets < 2)
222  return 0;
223 
224  for (t = 1; t < ntestsets; t++)
225  {
226  if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
227  mask |= MASK_TA;
228  if (testsets[t-1].param.N != testsets[t].param.N)
229  mask |= MASK_N;
230  if (testsets[t-1].param.M != testsets[t].param.M)
231  mask |= MASK_M;
232  if (testsets[t-1].param.m != testsets[t].param.m)
233  mask |= MASK_WINM;
234  if ((testsets[t-1].param.psi_flags & PRE_ONE_PSI) != (testsets[t].param.psi_flags & PRE_ONE_PSI))
235  mask |= MASK_FLAGS_PSI;
236  if ((testsets[t-1].param.psi_flags & NFFT_SORT_NODES) != (testsets[t].param.psi_flags & NFFT_SORT_NODES))
237  mask |= MASK_FLAGS_SORT;
238  if ((testsets[t-1].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT))
239  mask |= MASK_FLAGS_BW;
240  if ((testsets[t-1].param.nfsft_flags & NFSFT_USE_DPT) != (testsets[t].param.nfsft_flags & NFSFT_USE_DPT))
241  mask |= MASK_FLAGS_FPT;
242  }
243 
244  return mask;
245 }
246 
247 void get_plot_title(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask)
248 {
249  unsigned int mask = ~diff_mask;
250  int offset = 0;
251  int len;
252 
253  len = snprintf(outstr, maxlen, "%s", hostname);
254  if (len < 0 || len+offset >= maxlen-1) return;
255  offset += len;
256 
257  if (mask & MASK_TA)
258  {
259  len = snprintf(outstr+offset, maxlen-offset, " $\\mathrm{NFSFT}%s$", param.trafo_adjoint==0?"":"^\\top");
260  if (len < 0 || len+offset >= maxlen-1) return;
261  offset += len;
262  }
263 
264  if (mask & MASK_N)
265  {
266  len = snprintf(outstr+offset, maxlen-offset, " N=%d", param.N);
267  if (len < 0 || len+offset >= maxlen-1) return;
268  offset += len;
269  }
270 
271  if (mask & MASK_M)
272  {
273  len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M);
274  if (len < 0 || len+offset >= maxlen-1) return;
275  offset += len;
276  }
277 
278  if (mask & MASK_WINM)
279  {
280  len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m);
281  if (len < 0 || len+offset >= maxlen-1) return;
282  offset += len;
283  }
284 
285  if (mask & MASK_FLAGS_PSI)
286  {
287  len = snprintf(outstr+offset, maxlen-offset, " %s", get_psi_string(param.psi_flags));
288  if (len < 0 || len+offset >= maxlen-1) return;
289  offset += len;
290  }
291 
292  if (mask & MASK_FLAGS_SORT)
293  {
294  len = snprintf(outstr+offset, maxlen-offset, " %s", get_sort_string(param.psi_flags));
295  if (len < 0 || len+offset >= maxlen-1) return;
296  offset += len;
297  }
298 
299  if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.psi_flags)) > 0)
300  {
301  len = snprintf(outstr+offset, maxlen-offset, " %s", get_adjoint_omp_string(param.psi_flags));
302  if (len < 0 || len+offset >= maxlen-1) return;
303  offset += len;
304  }
305 
306  if (mask & MASK_FLAGS_FPT)
307  {
308  len = snprintf(outstr+offset, maxlen-offset, param.nfsft_flags & NFSFT_USE_DPT ? " DPT" : "");
309  if (len < 0 || len+offset >= maxlen-1) return;
310  offset += len;
311  }
312 
313 }
314 
315 void print_output_speedup_total_tref(FILE *out, s_testset *testsets, int ntestsets, int use_tref, double tref)
316 {
317  int i, t;
318  char hostname[1025];
319  char plottitle[1025];
320  unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
321 
322  if (gethostname(hostname, 1024) != 0)
323  strncpy(hostname, "unnamed", 1024);
324 
325  get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask);
326 
327  fprintf(out, "\\begin{tikzpicture}\n");
328  fprintf(out, "\\begin{axis}[");
329  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
330  fprintf(out, " title={%s}", plottitle);
331  fprintf(out, " ]\n");
332 
333  for (t = 0; t < ntestsets; t++)
334  {
335  s_testset testset = testsets[t];
336  fprintf(stderr, "%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
337  fprintf(stderr, "\n");
338 
339  fprintf(out, "\\addplot coordinates {");
340  for (i = 0; i < testset.nresults; i++)
341  if (use_tref == 1)
342  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
343  else
344  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
345  fprintf(out, "};\n");
346 
347  for (i = 0; i < testset.nresults; i++)
348  if (use_tref == 1)
349  fprintf(stderr, "%d:%.3f ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
350  else
351  fprintf(stderr, "%d:%.3f ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
352  fprintf(stderr, "\n\n");
353  }
354 
355  fprintf(out, "\\legend{{");
356  for (t = 0; t < ntestsets; t++)
357  {
358  char title[256];
359  if (t > 0)
360  fprintf(out, "},{");
361  get_plot_title(title, 255, "", testsets[t].param, ~(diff_mask));
362  fprintf(out, "%s", title);
363  }
364  fprintf(out, "}}\n");
365  fprintf(out, "\\end{axis}\n");
366  fprintf(out, "\\end{tikzpicture}\n");
367  fprintf(out, "\n\n");
368 
369  fflush(out);
370 }
371 
372 void print_output_speedup_total(FILE *out, s_testset *testsets, int ntestsets, int use_tref)
373 {
374  double tref = 1.0/0.0;
375  int t, k;
376 
377  if (use_tref == 1)
378  for (t = 0; t < ntestsets; t++)
379  for (k = 0; k < testsets[t].nresults; k++)
380  if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
381  tref = testsets[t].results[k].resval[5].avg;
382 
383  print_output_speedup_total_tref(out, testsets, ntestsets, use_tref, tref);
384 }
385 
386 void print_output_histo_PENRT(FILE *out, s_testset testset)
387 {
388  int i, size = testset.nresults;
389  char hostname[1025];
390 
391  if (gethostname(hostname, 1024) != 0)
392  strncpy(hostname, "unnamed", 1024);
393 
394  fprintf(out, "\\begin{tikzpicture}\n");
395  fprintf(out, "\\begin{axis}[");
396  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
397  fprintf(out, "symbolic x coords={");
398  for (i = 0; i < size; i++)
399  if (i > 0)
400  fprintf(out, ",%d", testset.results[i].nthreads);
401  else
402  fprintf(out, "%d", testset.results[i].nthreads);
403 
404  fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
405  fprintf(out, " title={%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
406  fprintf(out, " ]\n");
407  fprintf(out, "\\addplot coordinates {");
408  for (i = 0; i < size; i++)
409  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
410  fprintf(out, "};\n");
411 
412  fprintf(out, "\\addplot coordinates {");
413  for (i = 0; i < size; i++)
414  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
415  fprintf(out, "};\n");
416 
417  fprintf(out, "\\addplot coordinates {");
418  for (i = 0; i < size; i++)
419  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
420  fprintf(out, "};\n");
421 
422  fprintf(out, "\\addplot coordinates {");
423  for (i = 0; i < size; i++)
424  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
425  fprintf(out, "};\n");
426 
427  fprintf(out, "\\addplot coordinates {");
428  for (i = 0; i < size; i++)
429  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
430  fprintf(out, "};\n");
431  fprintf(out, "\\legend{%s,%s,$\\mathrm{NFFT}%s$,rest,total}\n", testset.param.nfsft_flags & NFSFT_USE_DPT ? "DPT" : "FPT", testset.param.trafo_adjoint==0?"c2e":"$\\mathrm{c2e}^\\top$", testset.param.trafo_adjoint==0?"":"^\\top");
432  fprintf(out, "\\end{axis}\n");
433  fprintf(out, "\\end{tikzpicture}\n");
434  fprintf(out, "\n\n");
435 
436  fflush(out);
437 }
438 
439 void run_testset(s_testset *testset, int trafo_adjoint, int N, int M, int m, int nfsft_flags, int psi_flags, int *nthreads_array, int n_threads_array_size)
440 {
441  int i;
442  testset->param.trafo_adjoint = trafo_adjoint;
443  testset->param.N = N;
444  testset->param.M = M;
445  testset->param.m = m;
446  testset->param.nfsft_flags = nfsft_flags;
447  testset->param.psi_flags = psi_flags;
448 
449  testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result));
450  testset->nresults = n_threads_array_size;
451 
452  run_test_create(testset->param.trafo_adjoint, testset->param.N, testset->param.M);
453  for (i = 0; i < n_threads_array_size; i++)
454  {
455  testset->results[i].nthreads = nthreads_array[i];
456  run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.nfsft_flags, testset->param.psi_flags, testset->results[i].nthreads = nthreads_array[i]);
457  }
458 
459 }
460 
461 void test1(int *nthreads_array, int n_threads_array_size, int m)
462 {
463  s_testset testsets[4];
464 
465  run_testset(&testsets[0], 0, 1024, 1000000, m, 0, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
466 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
467  print_output_histo_PENRT(file_out_tex, testsets[0]);
468 #endif
469 
470  run_testset(&testsets[1], 1, 1024, 1000000, m, 0, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
471 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
472  print_output_histo_PENRT(file_out_tex, testsets[1]);
473 #endif
474 
475  print_output_speedup_total(file_out_tex, testsets, 2, 0);
476 
477  run_testset(&testsets[2], 0, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
478 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
479  print_output_histo_PENRT(file_out_tex, testsets[2]);
480 #endif
481 
482  run_testset(&testsets[3], 1, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
483 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
484  print_output_histo_PENRT(file_out_tex, testsets[3]);
485 #endif
486 
487  print_output_speedup_total(file_out_tex, testsets+2, 2, 0);
488 }
489 
490 int main(int argc, char** argv)
491 {
492  int *nthreads_array;
493  int n_threads_array_size = get_nthreads_array(&nthreads_array);
494  int k;
495 
496 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
497  fprintf(stderr, "WARNING: Detailed time measurements for NFSFT are not activated.\n");
498  fprintf(stderr, "For more detailed plots, please re-run the configure script with options\n");
499  fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-nfsft --enable-openmp\n");
500  fprintf(stderr, "and run \"make clean all\"\n\n");
501 #endif
502 
503  for (k = 0; k < n_threads_array_size; k++)
504  fprintf(stderr, "%d ", nthreads_array[k]);
505  fprintf(stderr, "\n");
506 
507  file_out_tex = fopen("nfsft_benchomp_results_plots.tex", "w");
508 
509  test1(nthreads_array, n_threads_array_size, 2);
510  test1(nthreads_array, n_threads_array_size, 4);
511  test1(nthreads_array, n_threads_array_size, 6);
512  test1(nthreads_array, n_threads_array_size, 8);
513 
514  fclose(file_out_tex);
515 
516  return 0;
517 }

Generated on Tue Apr 30 2013 by Doxygen 1.8.1