30 static FILE* file_out_tex = NULL;
32 int get_nthreads_array(
int **arr)
34 int max_threads = nfft_get_omp_num_threads();
38 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
42 *arr = (
int*) malloc(max_threads*
sizeof(
int));
43 for (k = 0; k < max_threads; k++)
48 for (k = 1; k <= max_threads; k*=2, alloc_num++);
50 *arr = (
int*) malloc(alloc_num*
sizeof(
int));
52 for (k = 1; k <= max_threads; k*=2)
54 if (k != max_threads && 2*k > max_threads && max_threads_pw2)
56 *(*arr + ret_number) = max_threads/2;
60 *(*arr + ret_number) = k;
63 if (k != max_threads && 2*k > max_threads)
65 *(*arr + ret_number) = max_threads;
75 void check_result_value(
const int val,
const int ok,
const char *msg)
79 fprintf(stderr,
"ERROR %s: %d not %d\n", msg, val, ok);
85 void run_test_create(
int d,
int trafo_adjoint,
int N,
int M,
double sigma)
90 snprintf(cmd, 1024,
"./nfft_benchomp_createdataset %d %d %d %d %lg > nfft_benchomp_test.data", d, trafo_adjoint, N, M, sigma);
92 snprintf(cmd, 1024,
"./nfft_benchomp_createdataset %d %d %d %d %d %lg > nfft_benchomp_test.data", d, trafo_adjoint, N, N, M, sigma);
94 snprintf(cmd, 1024,
"./nfft_benchomp_createdataset %d %d %d %d %d %d %lg > nfft_benchomp_test.data", d, trafo_adjoint, N, N, N, M, sigma);
96 snprintf(cmd, 1024,
"./nfft_benchomp_createdataset %d %d %d %d %d %d %d %lg > nfft_benchomp_test.data", d, trafo_adjoint, N, N, N, N, M, sigma);
99 fprintf(stderr,
"%s\n", cmd);
100 check_result_value(system(cmd), 0,
"createdataset");
103 void run_test_init_output()
105 FILE *f = fopen(
"nfft_benchomp_test.result",
"w");
141 void run_test(
s_resval *res,
int nrepeat,
int m,
int flags,
int nthreads)
146 for (t = 0; t < 6; t++)
148 res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
152 snprintf(cmd, 1024,
"./nfft_benchomp_detail_single %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", m, flags);
154 snprintf(cmd, 1024,
"./nfft_benchomp_detail_threads %d %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", m, flags, nthreads);
155 fprintf(stderr,
"%s\n", cmd);
156 check_result_value(system(cmd), 0, cmd);
158 for (r = 0; r < nrepeat; r++)
163 check_result_value(system(cmd), 0, cmd);
164 f = fopen(
"nfft_benchomp_test.out",
"r");
165 retval = fscanf(f,
"%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
166 check_result_value(retval, 6,
"read nfft_benchomp_test.out");
169 for (t = 0; t < 6; t++)
172 if (res[t].min > v[t])
174 if (res[t].max < v[t])
179 for (t = 0; t < 6; t++)
180 res[t].avg /= nrepeat;
182 fprintf(stderr,
"%d %d: ", nthreads, nrepeat);
183 for (t = 0; t < 6; t++)
184 fprintf(stderr,
"%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
185 fprintf(stderr,
"\n");
188 const char *get_psi_string(
int flags)
190 if (flags & PRE_ONE_PSI)
195 const char *get_sort_string(
int flags)
197 if (flags & NFFT_SORT_NODES)
203 const char *get_adjoint_omp_string(
int flags)
205 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
211 #define MASK_D (1U<<0)
212 #define MASK_TA (1U<<1)
213 #define MASK_N (1U<<2)
214 #define MASK_SIGMA (1U<<3)
215 #define MASK_M (1U<<4)
216 #define MASK_WINM (1U<<5)
217 #define MASK_FLAGS_PSI (1U<<6)
218 #define MASK_FLAGS_SORT (1U<<7)
219 #define MASK_FLAGS_BW (1U<<8)
221 unsigned int determine_different_parameters(
s_testset *testsets,
int ntestsets)
224 unsigned int mask = 0;
229 for (t = 1; t < ntestsets; t++)
231 if (testsets[t-1].param.d != testsets[t].param.d)
233 if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
235 if (testsets[t-1].param.N != testsets[t].param.N)
237 if (testsets[t-1].param.sigma != testsets[t].param.sigma)
239 if (testsets[t-1].param.M != testsets[t].param.M)
241 if (testsets[t-1].param.m != testsets[t].param.m)
243 if ((testsets[t-1].param.flags & PRE_ONE_PSI) != (testsets[t].param.flags & PRE_ONE_PSI))
244 mask |= MASK_FLAGS_PSI;
245 if ((testsets[t-1].param.flags & NFFT_SORT_NODES) != (testsets[t].param.flags & NFFT_SORT_NODES))
246 mask |= MASK_FLAGS_SORT;
247 if ((testsets[t-1].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT))
248 mask |= MASK_FLAGS_BW;
254 void get_plot_title(
char *outstr,
int maxlen,
char *hostname,
s_param param,
unsigned int diff_mask)
256 unsigned int mask = ~diff_mask;
260 len = snprintf(outstr, maxlen,
"%s", hostname);
261 if (len < 0 || len+offset >= maxlen-1)
return;
266 len = snprintf(outstr+offset, maxlen-offset,
" %dd", param.d);
267 if (len < 0 || len+offset >= maxlen-1)
return;
273 len = snprintf(outstr+offset, maxlen-offset,
" $\\mathrm{NFFT}%s$", param.trafo_adjoint==0?
"":
"^\\top");
274 if (len < 0 || len+offset >= maxlen-1)
return;
280 len = snprintf(outstr+offset, maxlen-offset,
" N=%d", param.N);
281 if (len < 0 || len+offset >= maxlen-1)
return;
285 if (mask & MASK_SIGMA)
287 len = snprintf(outstr+offset, maxlen-offset,
" N=%g", param.sigma);
288 if (len < 0 || len+offset >= maxlen-1)
return;
294 len = snprintf(outstr+offset, maxlen-offset,
" M=%d", param.M);
295 if (len < 0 || len+offset >= maxlen-1)
return;
299 if (mask & MASK_WINM)
301 len = snprintf(outstr+offset, maxlen-offset,
" m=%d", param.m);
302 if (len < 0 || len+offset >= maxlen-1)
return;
306 if (mask & MASK_FLAGS_PSI)
308 len = snprintf(outstr+offset, maxlen-offset,
" %s", get_psi_string(param.flags));
309 if (len < 0 || len+offset >= maxlen-1)
return;
313 if (mask & MASK_FLAGS_SORT)
315 len = snprintf(outstr+offset, maxlen-offset,
" %s", get_sort_string(param.flags));
316 if (len < 0 || len+offset >= maxlen-1)
return;
320 if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.flags)) > 0)
322 len = snprintf(outstr+offset, maxlen-offset,
" %s", get_adjoint_omp_string(param.flags));
323 if (len < 0 || len+offset >= maxlen-1)
return;
328 void print_output_speedup_total_tref(FILE *out,
s_testset *testsets,
int ntestsets,
double tref)
332 char plottitle[1025];
333 unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
335 if (gethostname(hostname, 1024) != 0)
336 strncpy(hostname,
"unnamed", 1024);
338 get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask | MASK_FLAGS_SORT);
340 fprintf(out,
"\\begin{tikzpicture}\n");
341 fprintf(out,
"\\begin{axis}[");
342 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
343 fprintf(out,
" title={%s}", plottitle);
344 fprintf(out,
" ]\n");
346 for (t = 0; t < ntestsets; t++)
349 fprintf(stderr,
"%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags));
350 fprintf(stderr,
"\n");
352 fprintf(out,
"\\addplot coordinates {");
353 for (i = 0; i < testset.nresults; i++)
354 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
355 fprintf(out,
"};\n");
357 for (i = 0; i < testset.nresults; i++)
359 fprintf(stderr,
"%d:%.3f ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
361 fprintf(stderr,
"\n\n");
364 fprintf(out,
"\\legend{{");
365 for (t = 0; t < ntestsets; t++)
370 get_plot_title(title, 255,
"", testsets[t].param, ~(diff_mask | MASK_FLAGS_SORT));
371 fprintf(out,
"%s", title);
373 fprintf(out,
"}}\n");
374 fprintf(out,
"\\end{axis}\n");
375 fprintf(out,
"\\end{tikzpicture}\n");
376 fprintf(out,
"\n\n");
381 void print_output_speedup_total(FILE *out,
s_testset *testsets,
int ntestsets)
383 double tref = 1.0/0.0;
386 for (t = 0; t < ntestsets; t++)
387 for (k = 0; k < testsets[t].nresults; k++)
388 if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
389 tref = testsets[t].results[k].resval[5].avg;
391 print_output_speedup_total_tref(out, testsets, ntestsets, tref);
394 void print_output_histo_DFBRT(FILE *out,
s_testset testset)
396 int i, size = testset.nresults;
399 if (gethostname(hostname, 1024) != 0)
400 strncpy(hostname,
"unnamed", 1024);
402 fprintf(out,
"\\begin{tikzpicture}\n");
403 fprintf(out,
"\\begin{axis}[");
404 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
405 fprintf(out,
"symbolic x coords={");
406 for (i = 0; i < size; i++)
408 fprintf(out,
",%d", testset.results[i].nthreads);
410 fprintf(out,
"%d", testset.results[i].nthreads);
411 fprintf(stderr,
"FLAGS: %d\n", testset.param.flags);
413 fprintf(out,
"}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
414 fprintf(out,
" title={%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags));
415 fprintf(out,
" ]\n");
416 fprintf(out,
"\\addplot coordinates {");
417 for (i = 0; i < size; i++)
418 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
419 fprintf(out,
"};\n");
421 fprintf(out,
"\\addplot coordinates {");
422 for (i = 0; i < size; i++)
423 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
424 fprintf(out,
"};\n");
426 fprintf(out,
"\\addplot coordinates {");
427 for (i = 0; i < size; i++)
428 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
429 fprintf(out,
"};\n");
431 fprintf(out,
"\\addplot coordinates {");
432 for (i = 0; i < size; i++)
433 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
434 fprintf(out,
"};\n");
436 fprintf(out,
"\\addplot coordinates {");
437 for (i = 0; i < size; i++)
438 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
439 fprintf(out,
"};\n");
440 fprintf(out,
"\\legend{D,F,B,rest,total}\n");
441 fprintf(out,
"\\end{axis}\n");
442 fprintf(out,
"\\end{tikzpicture}\n");
443 fprintf(out,
"\n\n");
448 void run_testset(
s_testset *testset,
int d,
int trafo_adjoint,
int N,
int M,
double sigma,
int m,
int flags,
int *nthreads_array,
int n_threads_array_size)
451 testset->param.d = d;
452 testset->param.trafo_adjoint = trafo_adjoint;
453 testset->param.N = N;
454 testset->param.M = M;
455 testset->param.sigma = sigma;
456 testset->param.m = m;
457 testset->param.flags = flags;
459 testset->results = (
s_result*) malloc(n_threads_array_size*
sizeof(
s_result));
460 testset->nresults = n_threads_array_size;
462 run_test_create(testset->param.d, testset->param.trafo_adjoint, testset->param.N, testset->param.M, testset->param.sigma);
463 for (i = 0; i < n_threads_array_size; i++)
465 testset->results[i].nthreads = nthreads_array[i];
466 run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.flags, testset->results[i].nthreads = nthreads_array[i]);
471 void test1(
int *nthreads_array,
int n_threads_array_size,
int m)
475 run_testset(&testsets[0], 1, 0, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
476 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
477 print_output_histo_DFBRT(file_out_tex, testsets[0]);
480 run_testset(&testsets[1], 1, 0, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
481 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
482 print_output_histo_DFBRT(file_out_tex, testsets[1]);
485 print_output_speedup_total(file_out_tex, testsets, 2);
487 run_testset(&testsets[2], 1, 1, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
488 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
489 print_output_histo_DFBRT(file_out_tex, testsets[2]);
492 run_testset(&testsets[3], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
493 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
494 print_output_histo_DFBRT(file_out_tex, testsets[3]);
497 run_testset(&testsets[4], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
498 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
499 print_output_histo_DFBRT(file_out_tex, testsets[4]);
502 print_output_speedup_total(file_out_tex, testsets+2, 3);
504 run_testset(&testsets[5], 2, 0, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
505 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
506 print_output_histo_DFBRT(file_out_tex, testsets[5]);
509 run_testset(&testsets[6], 2, 0, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
510 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
511 print_output_histo_DFBRT(file_out_tex, testsets[6]);
514 print_output_speedup_total(file_out_tex, testsets+5, 2);
516 run_testset(&testsets[7], 2, 1, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
517 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
518 print_output_histo_DFBRT(file_out_tex, testsets[7]);
521 run_testset(&testsets[8], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
522 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
523 print_output_histo_DFBRT(file_out_tex, testsets[8]);
526 run_testset(&testsets[9], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
527 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
528 print_output_histo_DFBRT(file_out_tex, testsets[9]);
531 print_output_speedup_total(file_out_tex, testsets+7, 3);
533 run_testset(&testsets[10], 3, 0, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
534 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
535 print_output_histo_DFBRT(file_out_tex, testsets[10]);
538 run_testset(&testsets[11], 3, 0, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
539 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
540 print_output_histo_DFBRT(file_out_tex, testsets[11]);
543 print_output_speedup_total(file_out_tex, testsets+10, 2);
545 run_testset(&testsets[12], 3, 1, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
546 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
547 print_output_histo_DFBRT(file_out_tex, testsets[12]);
550 run_testset(&testsets[13], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
551 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
552 print_output_histo_DFBRT(file_out_tex, testsets[13]);
555 run_testset(&testsets[14], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
556 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
557 print_output_histo_DFBRT(file_out_tex, testsets[14]);
560 print_output_speedup_total(file_out_tex, testsets+12, 3);
564 int main(
int argc,
char** argv)
567 int n_threads_array_size = get_nthreads_array(&nthreads_array);
570 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
571 fprintf(stderr,
"WARNING: Detailed time measurements for NFFT are not activated.\n");
572 fprintf(stderr,
"For more detailed plots, please re-run the configure script with options\n");
573 fprintf(stderr,
"--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
574 fprintf(stderr,
"and run \"make clean all\"\n\n");
577 for (k = 0; k < n_threads_array_size; k++)
578 fprintf(stderr,
"%d ", nthreads_array[k]);
579 fprintf(stderr,
"\n");
581 file_out_tex = fopen(
"nfft_benchomp_results_plots.tex",
"w");
583 test1(nthreads_array, n_threads_array_size, 2);
584 test1(nthreads_array, n_threads_array_size, 4);
585 test1(nthreads_array, n_threads_array_size, 6);
587 fclose(file_out_tex);