30 static FILE* file_out_tex = NULL;
32 int get_nthreads_array(
int **arr)
34 int max_threads = nfft_get_omp_num_threads();
38 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
42 *arr = (
int*) malloc(max_threads*
sizeof(
int));
43 for (k = 0; k < max_threads; k++)
48 for (k = 1; k <= max_threads; k*=2, alloc_num++);
50 *arr = (
int*) malloc(alloc_num*
sizeof(
int));
52 for (k = 1; k <= max_threads; k*=2)
54 if (k != max_threads && 2*k > max_threads && max_threads_pw2)
56 *(*arr + ret_number) = max_threads/2;
60 *(*arr + ret_number) = k;
63 if (k != max_threads && 2*k > max_threads)
65 *(*arr + ret_number) = max_threads;
75 void check_result_value(
const int val,
const int ok,
const char *msg)
79 fprintf(stderr,
"ERROR %s: %d not %d\n", msg, val, ok);
85 void run_test_create(
int d,
int L,
int M)
89 snprintf(cmd, 1024,
"./fastsum_benchomp_createdataset %d %d %d > fastsum_benchomp_test.data", d, L, M);
90 fprintf(stderr,
"%s\n", cmd);
91 check_result_value(system(cmd), 0,
"createdataset");
94 void run_test_init_output()
96 FILE *f = fopen(
"fastsum_benchomp_test.result",
"w");
135 void run_test(
s_resval *res,
int nrepeat,
int n,
int m,
int p,
char *kernel_name,
double c,
double eps_I,
double eps_B,
int nthreads)
140 for (t = 0; t < 16; t++)
142 res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
146 snprintf(cmd, 1024,
"./fastsum_benchomp_detail_single %d %d %d %s %lg %lg %lg < fastsum_benchomp_test.data > fastsum_benchomp_test.out", n, m, p, kernel_name, c, eps_I, eps_B);
148 snprintf(cmd, 1024,
"./fastsum_benchomp_detail_threads %d %d %d %s %lg %lg %lg %d < fastsum_benchomp_test.data > fastsum_benchomp_test.out", n, m, p, kernel_name, c, eps_I, eps_B, nthreads);
149 fprintf(stderr,
"%s\n", cmd);
150 check_result_value(system(cmd), 0, cmd);
152 for (r = 0; r < nrepeat; r++)
157 check_result_value(system(cmd), 0, cmd);
158 f = fopen(
"fastsum_benchomp_test.out",
"r");
159 retval = fscanf(f,
"%lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5, v+6, v+7, v+8, v+9, v+10, v+11, v+12, v+13, v+14, v+15);
160 check_result_value(retval, 16,
"read fastsum_benchomp_test.out");
163 for (t = 0; t < 16; t++)
166 if (res[t].min > v[t])
168 if (res[t].max < v[t])
173 for (t = 0; t < 16; t++)
174 res[t].avg /= nrepeat;
176 fprintf(stderr,
"%d %d: ", nthreads, nrepeat);
177 for (t = 0; t < 16; t++)
178 fprintf(stderr,
"%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
179 fprintf(stderr,
"\n");
182 const char *get_psi_string(
int flags)
186 else if (flags & PRE_ONE_PSI)
191 const char *get_sort_string(
int flags)
193 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
196 if (flags & NFFT_SORT_NODES)
202 const char *get_adjoint_omp_string(
int flags)
204 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
210 #define MASK_FSUM_D (1U<<0)
211 #define MASK_FSUM_L (1U<<1)
212 #define MASK_FSUM_M (1U<<2)
213 #define MASK_FSUM_MULTIBW (1U<<3)
214 #define MASK_FSUM_WINM (1U<<4)
215 #define MASK_FSUM_P (1U<<5)
216 #define MASK_FSUM_KERNEL (1U<<6)
217 #define MASK_FSUM_EPSI (1U<<7)
218 #define MASK_FSUM_EPSB (1U<<8)
220 unsigned int fastsum_determine_different_parameters(
s_testset *testsets,
int ntestsets)
223 unsigned int mask = 0;
228 for (t = 1; t < ntestsets; t++)
230 if (testsets[t-1].param.d != testsets[t].param.d)
232 if (testsets[t-1].param.L != testsets[t].param.L)
234 if (testsets[t-1].param.M != testsets[t].param.M)
236 if (testsets[t-1].param.n != testsets[t].param.n)
237 mask |= MASK_FSUM_MULTIBW;
238 if (testsets[t-1].param.m != testsets[t].param.m)
239 mask |= MASK_FSUM_WINM;
240 if (testsets[t-1].param.p != testsets[t].param.p)
242 if (strcmp(testsets[t-1].param.kernel_name, testsets[t].param.kernel_name) != 0)
243 mask |= MASK_FSUM_KERNEL;
244 if (testsets[t-1].param.eps_I != testsets[t].param.eps_I)
245 mask |= MASK_FSUM_EPSI;
246 if (testsets[t-1].param.eps_B != testsets[t].param.eps_B)
247 mask |= MASK_FSUM_EPSB;
253 void strEscapeUnderscore(
char *dst,
char *src,
int maxlen)
259 while (src[i] !=
'\0' && len + offset < maxlen - 1)
262 len = snprintf(dst+offset, maxlen-offset,
"\\_{}");
264 len = snprintf(dst+offset, maxlen-offset,
"%c", src[i]);
270 void fastsum_get_plot_title_minus_indep(
char *outstr,
int maxlen,
char *hostname,
s_param param,
unsigned int diff_mask)
272 unsigned int mask = ~diff_mask;
276 len = snprintf(outstr, maxlen,
"%s", hostname);
277 if (len < 0 || len+offset >= maxlen-1)
return;
280 if (mask & MASK_FSUM_D)
282 len = snprintf(outstr+offset, maxlen-offset,
" %dd fastsum", param.d);
283 if (len < 0 || len+offset >= maxlen-1)
return;
287 if ((mask & (MASK_FSUM_L | MASK_FSUM_M)) && param.L == param.M)
289 len = snprintf(outstr+offset, maxlen-offset,
" L=M=%d", param.L);
290 if (len < 0 || len+offset >= maxlen-1)
return;
295 if (mask & MASK_FSUM_L)
297 len = snprintf(outstr+offset, maxlen-offset,
" L=%d", param.L);
298 if (len < 0 || len+offset >= maxlen-1)
return;
302 if (mask & MASK_FSUM_M)
304 len = snprintf(outstr+offset, maxlen-offset,
" M=%d", param.M);
305 if (len < 0 || len+offset >= maxlen-1)
return;
310 if (mask & MASK_FSUM_MULTIBW)
312 len = snprintf(outstr+offset, maxlen-offset,
" n=%d", param.n);
313 if (len < 0 || len+offset >= maxlen-1)
return;
317 if (mask & MASK_FSUM_WINM)
319 len = snprintf(outstr+offset, maxlen-offset,
" m=%d", param.m);
320 if (len < 0 || len+offset >= maxlen-1)
return;
324 if (mask & MASK_FSUM_P)
326 len = snprintf(outstr+offset, maxlen-offset,
" p=%d", param.p);
327 if (len < 0 || len+offset >= maxlen-1)
return;
331 if (mask & MASK_FSUM_KERNEL)
334 strEscapeUnderscore(tmp, param.kernel_name, maxlen);
336 len = snprintf(outstr+offset, maxlen-offset,
" %s", tmp);
337 if (len < 0 || len+offset >= maxlen-1)
return;
341 if ((mask & (MASK_FSUM_EPSI | MASK_FSUM_EPSB)) && param.eps_I == param.eps_B)
343 len = snprintf(outstr+offset, maxlen-offset,
" $\\varepsilon_\\mathrm{I}$=$\\varepsilon_\\mathrm{B}$=%g", param.eps_I);
344 if (len < 0 || len+offset >= maxlen-1)
return;
349 if (mask & MASK_FSUM_EPSI)
351 len = snprintf(outstr+offset, maxlen-offset,
" $\\varepsilon_\\mathrm{I}$=%g", param.eps_I);
352 if (len < 0 || len+offset >= maxlen-1)
return;
356 if (mask & MASK_FSUM_EPSB)
358 len = snprintf(outstr+offset, maxlen-offset,
" $\\varepsilon_\\mathrm{B}$=%g", param.eps_B);
359 if (len < 0 || len+offset >= maxlen-1)
return;
365 void nfft_adjoint_print_output_histo_DFBRT(FILE *out,
s_testset testset)
367 int i, size = testset.nresults;
370 if (gethostname(hostname, 1024) != 0)
371 strncpy(hostname,
"unnamed", 1024);
373 fprintf(out,
"\\begin{tikzpicture}\n");
374 fprintf(out,
"\\begin{axis}[");
375 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
376 fprintf(out,
"symbolic x coords={");
377 for (i = 0; i < size; i++)
379 fprintf(out,
",%d", testset.results[i].nthreads);
381 fprintf(out,
"%d", testset.results[i].nthreads);
383 fprintf(out,
"}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
384 fprintf(out,
" title={%s %dd $\\textrm{NFFT}^\\top$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}", hostname, testset.param.d, testset.param.n, testset.param.M, testset.param.m);
385 fprintf(out,
" ]\n");
386 fprintf(out,
"\\addplot coordinates {");
387 for (i = 0; i < size; i++)
388 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[10].avg);
389 fprintf(out,
"};\n");
391 fprintf(out,
"\\addplot coordinates {");
392 for (i = 0; i < size; i++)
393 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[11].avg);
394 fprintf(out,
"};\n");
396 fprintf(out,
"\\addplot coordinates {");
397 for (i = 0; i < size; i++)
398 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[12].avg);
399 fprintf(out,
"};\n");
401 fprintf(out,
"\\addplot coordinates {");
402 for (i = 0; i < size; i++)
403 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
404 fprintf(out,
"};\n");
406 fprintf(out,
"\\addplot coordinates {");
407 for (i = 0; i < size; i++)
408 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[4].avg + testset.results[i].resval[1].avg);
409 fprintf(out,
"};\n");
410 fprintf(out,
"\\legend{D,$\\textrm{F}^\\top$,$\\textrm{B}^\\top$,prepsi,total}\n");
411 fprintf(out,
"\\end{axis}\n");
412 fprintf(out,
"\\end{tikzpicture}\n");
413 fprintf(out,
"\n\n");
418 void nfft_trafo_print_output_histo_DFBRT(FILE *out,
s_testset testset)
420 int i, size = testset.nresults;
423 if (gethostname(hostname, 1024) != 0)
424 strncpy(hostname,
"unnamed", 1024);
426 fprintf(out,
"\\begin{tikzpicture}\n");
427 fprintf(out,
"\\begin{axis}[");
428 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
429 fprintf(out,
"symbolic x coords={");
430 for (i = 0; i < size; i++)
432 fprintf(out,
",%d", testset.results[i].nthreads);
434 fprintf(out,
"%d", testset.results[i].nthreads);
436 fprintf(out,
"}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
437 fprintf(out,
" title={%s %dd $\\textrm{NFFT}$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}", hostname, testset.param.d, testset.param.n, testset.param.M, testset.param.m);
438 fprintf(out,
" ]\n");
439 fprintf(out,
"\\addplot coordinates {");
440 for (i = 0; i < size; i++)
441 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[13].avg);
442 fprintf(out,
"};\n");
444 fprintf(out,
"\\addplot coordinates {");
445 for (i = 0; i < size; i++)
446 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[14].avg);
447 fprintf(out,
"};\n");
449 fprintf(out,
"\\addplot coordinates {");
450 for (i = 0; i < size; i++)
451 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[15].avg);
452 fprintf(out,
"};\n");
454 fprintf(out,
"\\addplot coordinates {");
455 for (i = 0; i < size; i++)
456 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
457 fprintf(out,
"};\n");
459 fprintf(out,
"\\addplot coordinates {");
460 for (i = 0; i < size; i++)
461 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[6].avg + testset.results[i].resval[2].avg);
462 fprintf(out,
"};\n");
463 fprintf(out,
"\\legend{D,F,B,prepsi,total}\n");
464 fprintf(out,
"\\end{axis}\n");
465 fprintf(out,
"\\end{tikzpicture}\n");
466 fprintf(out,
"\n\n");
471 void fastsum_print_output_histo_PreRfNfT(FILE *out,
s_testset testset)
473 int i, size = testset.nresults;
475 char plottitle[1025];
477 if (gethostname(hostname, 1024) != 0)
478 strncpy(hostname,
"unnamed", 1024);
480 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testset.param, 0);
482 fprintf(out,
"\\begin{tikzpicture}\n");
483 fprintf(out,
"\\begin{axis}[");
484 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
485 fprintf(out,
"symbolic x coords={");
486 for (i = 0; i < size; i++)
488 fprintf(out,
",%d", testset.results[i].nthreads);
490 fprintf(out,
"%d", testset.results[i].nthreads);
492 fprintf(out,
"}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
493 fprintf(out,
" title={%s}", plottitle);
494 fprintf(out,
" ]\n");
495 fprintf(out,
"\\addplot coordinates {");
496 for (i = 0; i < size; i++)
497 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg+testset.results[i].resval[2].avg);
498 fprintf(out,
"};\n");
500 fprintf(out,
"\\addplot coordinates {");
501 for (i = 0; i < size; i++)
502 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
503 fprintf(out,
"};\n");
505 fprintf(out,
"\\addplot coordinates {");
506 for (i = 0; i < size; i++)
507 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[4].avg + testset.results[i].resval[5].avg + testset.results[i].resval[6].avg);
508 fprintf(out,
"};\n");
510 fprintf(out,
"\\addplot coordinates {");
511 for (i = 0; i < size; i++)
512 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[7].avg);
513 fprintf(out,
"};\n");
515 fprintf(out,
"\\addplot coordinates {");
516 for (i = 0; i < size; i++)
517 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[9].avg - testset.results[i].resval[0].avg);
518 fprintf(out,
"};\n");
519 fprintf(out,
"\\legend{prepsi (step 1b),init nearfield (step 1c),far field (steps 2a-c),nearfield (step 2d),total $-$ step 1a}\n");
520 fprintf(out,
"\\end{axis}\n");
521 fprintf(out,
"\\end{tikzpicture}\n");
522 fprintf(out,
"\n\n");
527 void fastsum_print_output_speedup_total_minus_indep(FILE *out,
s_testset *testsets,
int ntestsets)
531 char plottitle[1025];
532 unsigned int diff_mask = fastsum_determine_different_parameters(testsets, ntestsets);
534 if (gethostname(hostname, 1024) != 0)
535 strncpy(hostname,
"unnamed", 1024);
537 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testsets[0].param, diff_mask | MASK_FSUM_WINM);
539 fprintf(out,
"\\begin{tikzpicture}\n");
540 fprintf(out,
"\\begin{axis}[");
541 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
542 fprintf(out,
" title={%s}", plottitle);
543 fprintf(out,
" ]\n");
545 for (t = 0; t < ntestsets; t++)
550 for (i = 0; i < testset.nresults; i++)
551 if (testset.results[i].nthreads == 1)
552 tref = testset.results[i].resval[9].avg - testset.results[i].resval[0].avg;
554 fprintf(out,
"\\addplot coordinates {");
555 for (i = 0; i < testset.nresults; i++)
556 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, tref/(testset.results[i].resval[9].avg - testset.results[i].resval[0].avg));
557 fprintf(out,
"};\n");
559 for (i = 0; i < testset.nresults; i++)
561 fprintf(stderr,
"%d:%.3f ", testset.results[i].nthreads, tref/(testset.results[i].resval[9].avg - testset.results[i].resval[0].avg));
563 fprintf(stderr,
"\n\n");
566 fprintf(out,
"\\legend{{");
567 for (t = 0; t < ntestsets; t++)
572 fastsum_get_plot_title_minus_indep(title, 255,
"", testsets[t].param, ~(diff_mask | MASK_FSUM_WINM));
573 fprintf(out,
"%s", title);
575 fprintf(out,
"}}\n");
576 fprintf(out,
"\\end{axis}\n");
577 fprintf(out,
"\\end{tikzpicture}\n");
578 fprintf(out,
"\n\n");
583 void run_testset(
s_testset *testset,
int d,
int L,
int M,
int n,
int m,
int p,
char *kernel_name,
double c,
double eps_I,
double eps_B,
int *nthreads_array,
int n_threads_array_size)
586 testset->param.d = d;
587 testset->param.L = L;
588 testset->param.M = M;
589 testset->param.n = n;
590 testset->param.m = m;
591 testset->param.p = p;
592 testset->param.kernel_name = kernel_name;
593 testset->param.c = c;
594 testset->param.eps_I = eps_I;
595 testset->param.eps_B = eps_B;
597 testset->results = (
s_result*) malloc(n_threads_array_size*
sizeof(
s_result));
598 testset->nresults = n_threads_array_size;
600 run_test_create(testset->param.d, testset->param.L, testset->param.M);
601 for (i = 0; i < n_threads_array_size; i++)
603 testset->results[i].nthreads = nthreads_array[i];
604 run_test(testset->results[i].resval, NREPEAT, testset->param.n, testset->param.m, testset->param.p, testset->param.kernel_name, testset->param.c, testset->param.eps_I, testset->param.eps_B, testset->results[i].nthreads);
609 void test1(
int *nthreads_array,
int n_threads_array_size)
613 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
614 run_testset(&testsets[0], 3, 100000, 100000, 128, 4, 7,
"one_over_x", 0.0, 0.03125, 0.03125, nthreads_array, n_threads_array_size);
616 fastsum_print_output_speedup_total_minus_indep(file_out_tex, testsets, 1);
618 fastsum_print_output_histo_PreRfNfT(file_out_tex, testsets[0]);
620 nfft_adjoint_print_output_histo_DFBRT(file_out_tex, testsets[0]);
622 nfft_trafo_print_output_histo_DFBRT(file_out_tex, testsets[0]);
626 int main(
int argc,
char** argv)
629 int n_threads_array_size = get_nthreads_array(&nthreads_array);
632 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
633 fprintf(stderr,
"WARNING: Detailed time measurements are not activated.\n");
634 fprintf(stderr,
"Please re-run the configure script with options\n");
635 fprintf(stderr,
"--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
636 fprintf(stderr,
"and run \"make clean all\"\n\n");
639 for (k = 0; k < n_threads_array_size; k++)
640 fprintf(stderr,
"%d ", nthreads_array[k]);
641 fprintf(stderr,
"\n");
643 file_out_tex = fopen(
"fastsum_benchomp_results_plots.tex",
"w");
645 test1(nthreads_array, n_threads_array_size);
647 fclose(file_out_tex);