30 static FILE* file_out_tex = NULL;
32 int get_nthreads_array(
int **arr)
34 int max_threads = nfft_get_omp_num_threads();
38 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
42 *arr = (
int*) malloc(max_threads*
sizeof(
int));
43 for (k = 0; k < max_threads; k++)
48 for (k = 1; k <= max_threads; k*=2, alloc_num++);
50 *arr = (
int*) malloc(alloc_num*
sizeof(
int));
52 for (k = 1; k <= max_threads; k*=2)
54 if (k != max_threads && 2*k > max_threads && max_threads_pw2)
56 *(*arr + ret_number) = max_threads/2;
60 *(*arr + ret_number) = k;
63 if (k != max_threads && 2*k > max_threads)
65 *(*arr + ret_number) = max_threads;
75 void check_result_value(
const int val,
const int ok,
const char *msg)
79 fprintf(stderr,
"ERROR %s: %d not %d\n", msg, val, ok);
85 void run_test_create(
int trafo_adjoint,
int N,
int M)
89 snprintf(cmd, 1024,
"./nfsft_benchomp_createdataset %d %d %d > nfsft_benchomp_test.data", trafo_adjoint, N, M);
90 fprintf(stderr,
"%s\n", cmd);
91 check_result_value(system(cmd), 0,
"createdataset");
94 void run_test_init_output()
96 FILE *f = fopen(
"nfsft_benchomp_test.result",
"w");
131 void run_test(
s_resval *res,
int nrepeat,
int m,
int nfsft_flags,
int psi_flags,
int nthreads)
137 for (t = 0; t < 6; t++)
139 res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
143 snprintf(cmd, 1024,
"./nfsft_benchomp_detail_single %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", m, nfsft_flags, psi_flags, nrepeat);
145 snprintf(cmd, 1024,
"./nfsft_benchomp_detail_threads %d %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", m, nfsft_flags, psi_flags, nrepeat, nthreads);
146 fprintf(stderr,
"%s\n", cmd);
148 check_result_value(system(cmd), 0, cmd);
150 f = fopen(
"nfsft_benchomp_test.out",
"r");
151 for (r = 0; r < nrepeat; r++)
158 retval = fscanf(f,
"%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
159 check_result_value(retval, 6,
"read nfsft_benchomp_test.out");
162 for (t = 0; t < 6; t++)
165 if (res[t].min > v[t])
167 if (res[t].max < v[t])
173 for (t = 0; t < 6; t++)
174 res[t].avg /= nrepeat;
176 fprintf(stderr,
"%d %d: ", nthreads, nrepeat);
177 for (t = 0; t < 6; t++)
178 fprintf(stderr,
"%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
179 fprintf(stderr,
"\n");
182 const char *get_psi_string(
int flags)
186 else if (flags & PRE_ONE_PSI)
191 const char *get_sort_string(
int flags)
193 if (flags & NFFT_SORT_NODES)
199 const char *get_adjoint_omp_string(
int flags)
201 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
207 #define MASK_TA (1U<<1)
208 #define MASK_N (1U<<2)
209 #define MASK_M (1U<<4)
210 #define MASK_WINM (1U<<5)
211 #define MASK_FLAGS_PSI (1U<<6)
212 #define MASK_FLAGS_SORT (1U<<7)
213 #define MASK_FLAGS_BW (1U<<8)
214 #define MASK_FLAGS_FPT (1U<<9)
216 unsigned int determine_different_parameters(
s_testset *testsets,
int ntestsets)
219 unsigned int mask = 0;
224 for (t = 1; t < ntestsets; t++)
226 if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
228 if (testsets[t-1].param.N != testsets[t].param.N)
230 if (testsets[t-1].param.M != testsets[t].param.M)
232 if (testsets[t-1].param.m != testsets[t].param.m)
234 if ((testsets[t-1].param.psi_flags & PRE_ONE_PSI) != (testsets[t].param.psi_flags & PRE_ONE_PSI))
235 mask |= MASK_FLAGS_PSI;
236 if ((testsets[t-1].param.psi_flags & NFFT_SORT_NODES) != (testsets[t].param.psi_flags & NFFT_SORT_NODES))
237 mask |= MASK_FLAGS_SORT;
238 if ((testsets[t-1].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT))
239 mask |= MASK_FLAGS_BW;
240 if ((testsets[t-1].param.nfsft_flags & NFSFT_USE_DPT) != (testsets[t].param.nfsft_flags & NFSFT_USE_DPT))
241 mask |= MASK_FLAGS_FPT;
247 void get_plot_title(
char *outstr,
int maxlen,
char *hostname,
s_param param,
unsigned int diff_mask)
249 unsigned int mask = ~diff_mask;
253 len = snprintf(outstr, maxlen,
"%s", hostname);
254 if (len < 0 || len+offset >= maxlen-1)
return;
259 len = snprintf(outstr+offset, maxlen-offset,
" $\\mathrm{NFSFT}%s$", param.trafo_adjoint==0?
"":
"^\\top");
260 if (len < 0 || len+offset >= maxlen-1)
return;
266 len = snprintf(outstr+offset, maxlen-offset,
" N=%d", param.N);
267 if (len < 0 || len+offset >= maxlen-1)
return;
273 len = snprintf(outstr+offset, maxlen-offset,
" M=%d", param.M);
274 if (len < 0 || len+offset >= maxlen-1)
return;
278 if (mask & MASK_WINM)
280 len = snprintf(outstr+offset, maxlen-offset,
" m=%d", param.m);
281 if (len < 0 || len+offset >= maxlen-1)
return;
285 if (mask & MASK_FLAGS_PSI)
287 len = snprintf(outstr+offset, maxlen-offset,
" %s", get_psi_string(param.psi_flags));
288 if (len < 0 || len+offset >= maxlen-1)
return;
292 if (mask & MASK_FLAGS_SORT)
294 len = snprintf(outstr+offset, maxlen-offset,
" %s", get_sort_string(param.psi_flags));
295 if (len < 0 || len+offset >= maxlen-1)
return;
299 if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.psi_flags)) > 0)
301 len = snprintf(outstr+offset, maxlen-offset,
" %s", get_adjoint_omp_string(param.psi_flags));
302 if (len < 0 || len+offset >= maxlen-1)
return;
306 if (mask & MASK_FLAGS_FPT)
308 len = snprintf(outstr+offset, maxlen-offset, param.nfsft_flags & NFSFT_USE_DPT ?
" DPT" :
"");
309 if (len < 0 || len+offset >= maxlen-1)
return;
315 void print_output_speedup_total_tref(FILE *out,
s_testset *testsets,
int ntestsets,
int use_tref,
double tref)
319 char plottitle[1025];
320 unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
322 if (gethostname(hostname, 1024) != 0)
323 strncpy(hostname,
"unnamed", 1024);
325 get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask);
327 fprintf(out,
"\\begin{tikzpicture}\n");
328 fprintf(out,
"\\begin{axis}[");
329 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
330 fprintf(out,
" title={%s}", plottitle);
331 fprintf(out,
" ]\n");
333 for (t = 0; t < ntestsets; t++)
336 fprintf(stderr,
"%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
337 fprintf(stderr,
"\n");
339 fprintf(out,
"\\addplot coordinates {");
340 for (i = 0; i < testset.nresults; i++)
342 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
344 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
345 fprintf(out,
"};\n");
347 for (i = 0; i < testset.nresults; i++)
349 fprintf(stderr,
"%d:%.3f ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
351 fprintf(stderr,
"%d:%.3f ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
352 fprintf(stderr,
"\n\n");
355 fprintf(out,
"\\legend{{");
356 for (t = 0; t < ntestsets; t++)
361 get_plot_title(title, 255,
"", testsets[t].param, ~(diff_mask));
362 fprintf(out,
"%s", title);
364 fprintf(out,
"}}\n");
365 fprintf(out,
"\\end{axis}\n");
366 fprintf(out,
"\\end{tikzpicture}\n");
367 fprintf(out,
"\n\n");
372 void print_output_speedup_total(FILE *out,
s_testset *testsets,
int ntestsets,
int use_tref)
374 double tref = 1.0/0.0;
378 for (t = 0; t < ntestsets; t++)
379 for (k = 0; k < testsets[t].nresults; k++)
380 if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
381 tref = testsets[t].results[k].resval[5].avg;
383 print_output_speedup_total_tref(out, testsets, ntestsets, use_tref, tref);
386 void print_output_histo_PENRT(FILE *out,
s_testset testset)
388 int i, size = testset.nresults;
391 if (gethostname(hostname, 1024) != 0)
392 strncpy(hostname,
"unnamed", 1024);
394 fprintf(out,
"\\begin{tikzpicture}\n");
395 fprintf(out,
"\\begin{axis}[");
396 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
397 fprintf(out,
"symbolic x coords={");
398 for (i = 0; i < size; i++)
400 fprintf(out,
",%d", testset.results[i].nthreads);
402 fprintf(out,
"%d", testset.results[i].nthreads);
404 fprintf(out,
"}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
405 fprintf(out,
" title={%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
406 fprintf(out,
" ]\n");
407 fprintf(out,
"\\addplot coordinates {");
408 for (i = 0; i < size; i++)
409 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
410 fprintf(out,
"};\n");
412 fprintf(out,
"\\addplot coordinates {");
413 for (i = 0; i < size; i++)
414 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
415 fprintf(out,
"};\n");
417 fprintf(out,
"\\addplot coordinates {");
418 for (i = 0; i < size; i++)
419 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
420 fprintf(out,
"};\n");
422 fprintf(out,
"\\addplot coordinates {");
423 for (i = 0; i < size; i++)
424 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
425 fprintf(out,
"};\n");
427 fprintf(out,
"\\addplot coordinates {");
428 for (i = 0; i < size; i++)
429 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
430 fprintf(out,
"};\n");
431 fprintf(out,
"\\legend{%s,%s,$\\mathrm{NFFT}%s$,rest,total}\n", testset.param.nfsft_flags & NFSFT_USE_DPT ?
"DPT" :
"FPT", testset.param.trafo_adjoint==0?
"c2e":
"$\\mathrm{c2e}^\\top$", testset.param.trafo_adjoint==0?
"":
"^\\top");
432 fprintf(out,
"\\end{axis}\n");
433 fprintf(out,
"\\end{tikzpicture}\n");
434 fprintf(out,
"\n\n");
439 void run_testset(
s_testset *testset,
int trafo_adjoint,
int N,
int M,
int m,
int nfsft_flags,
int psi_flags,
int *nthreads_array,
int n_threads_array_size)
442 testset->param.trafo_adjoint = trafo_adjoint;
443 testset->param.N = N;
444 testset->param.M = M;
445 testset->param.m = m;
446 testset->param.nfsft_flags = nfsft_flags;
447 testset->param.psi_flags = psi_flags;
449 testset->results = (
s_result*) malloc(n_threads_array_size*
sizeof(
s_result));
450 testset->nresults = n_threads_array_size;
452 run_test_create(testset->param.trafo_adjoint, testset->param.N, testset->param.M);
453 for (i = 0; i < n_threads_array_size; i++)
455 testset->results[i].nthreads = nthreads_array[i];
456 run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.nfsft_flags, testset->param.psi_flags, testset->results[i].nthreads = nthreads_array[i]);
461 void test1(
int *nthreads_array,
int n_threads_array_size,
int m)
465 run_testset(&testsets[0], 0, 1024, 1000000, m, 0, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
466 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
467 print_output_histo_PENRT(file_out_tex, testsets[0]);
470 run_testset(&testsets[1], 1, 1024, 1000000, m, 0, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
471 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
472 print_output_histo_PENRT(file_out_tex, testsets[1]);
475 print_output_speedup_total(file_out_tex, testsets, 2, 0);
477 run_testset(&testsets[2], 0, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
478 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
479 print_output_histo_PENRT(file_out_tex, testsets[2]);
482 run_testset(&testsets[3], 1, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
483 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
484 print_output_histo_PENRT(file_out_tex, testsets[3]);
487 print_output_speedup_total(file_out_tex, testsets+2, 2, 0);
490 int main(
int argc,
char** argv)
493 int n_threads_array_size = get_nthreads_array(&nthreads_array);
496 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
497 fprintf(stderr,
"WARNING: Detailed time measurements for NFSFT are not activated.\n");
498 fprintf(stderr,
"For more detailed plots, please re-run the configure script with options\n");
499 fprintf(stderr,
"--enable-measure-time --enable-measure-time-fftw --enable-nfsft --enable-openmp\n");
500 fprintf(stderr,
"and run \"make clean all\"\n\n");
503 for (k = 0; k < n_threads_array_size; k++)
504 fprintf(stderr,
"%d ", nthreads_array[k]);
505 fprintf(stderr,
"\n");
507 file_out_tex = fopen(
"nfsft_benchomp_results_plots.tex",
"w");
509 test1(nthreads_array, n_threads_array_size, 2);
510 test1(nthreads_array, n_threads_array_size, 4);
511 test1(nthreads_array, n_threads_array_size, 6);
512 test1(nthreads_array, n_threads_array_size, 8);
514 fclose(file_out_tex);