[4773] | 1 | #!/usr/bin/perl |
---|
| 2 | |
---|
| 3 | # |
---|
| 4 | # drhook_merge_walltime.pl |
---|
| 5 | # |
---|
| 6 | # For merging wall clock time results from different MPI-tasks |
---|
| 7 | # i.e. DR_HOOK_OPT=prof |
---|
| 8 | # |
---|
| 9 | # Original script by Eckhard Tschirschnitz, Cray, 2006 (Mflop/s) |
---|
| 10 | # |
---|
| 11 | # Usage: cat drhook.* | perl -w drhook_merge_walltime.pl |
---|
| 12 | # |
---|
| 13 | |
---|
| 14 | use strict; |
---|
| 15 | use warnings; |
---|
| 16 | |
---|
| 17 | # this expects concatenated dr_hook listings (wall clock time listings) |
---|
| 18 | |
---|
| 19 | my $bignum = 999999999; |
---|
| 20 | |
---|
| 21 | my $skip = 1; |
---|
| 22 | my $str = "THRESHOLD_PERCENT"; |
---|
| 23 | my $threshold_percent = exists($ENV{$str}) ? $ENV{$str} : 0.1; |
---|
| 24 | my $tottim = 0; |
---|
| 25 | |
---|
| 26 | my $maxwall = 0; |
---|
| 27 | my $minwall = $bignum; |
---|
| 28 | my $avgwall = 0; |
---|
| 29 | my $stdevwall = 0; |
---|
| 30 | |
---|
| 31 | my $nproc = 0; # no of MPI-tasks |
---|
| 32 | my $maxomp = 0; # max. no of OpenMP-threads encountered |
---|
| 33 | my $exe = ""; # the name of the executable |
---|
| 34 | |
---|
| 35 | my %nself = (); |
---|
| 36 | my %sumself = (); |
---|
| 37 | my %sum2self = (); |
---|
| 38 | my %maxself = (); |
---|
| 39 | my %minself = (); |
---|
| 40 | my %ompself = (); |
---|
| 41 | |
---|
| 42 | my %numcalls = (); |
---|
| 43 | |
---|
| 44 | my %taskhits = (); |
---|
| 45 | my %omphits = (); |
---|
| 46 | |
---|
| 47 | RECORD: for (<>) { |
---|
| 48 | chomp; # get rid of the newline character |
---|
| 49 | |
---|
| 50 | next RECORD if (m/^\s*$/); # a blank line |
---|
| 51 | |
---|
| 52 | if (m/^\s*Profiling\b/) { |
---|
| 53 | if ($nproc == 0) { |
---|
| 54 | $exe = $1 if (m/program='([^\'].*)'/); |
---|
| 55 | } |
---|
| 56 | $nproc++; |
---|
| 57 | $skip = 1; |
---|
| 58 | } |
---|
| 59 | elsif (m/^\s+Wall-time\s+is\s+(\S+)\s+/) { |
---|
| 60 | my $value = $1; |
---|
| 61 | $maxwall = $value if ($value > $maxwall); |
---|
| 62 | $minwall = $value if ($value < $minwall); |
---|
| 63 | $avgwall += $value; |
---|
| 64 | $stdevwall += $value * $value; |
---|
| 65 | $skip = 1; |
---|
| 66 | } |
---|
| 67 | elsif (m/^\s+1\s+/) { # the first record of (any) file encountered |
---|
| 68 | &CumulativeUpdate(); |
---|
| 69 | %ompself = (); |
---|
| 70 | $skip = 0; |
---|
| 71 | } |
---|
| 72 | |
---|
| 73 | next RECORD if ($skip); |
---|
| 74 | |
---|
| 75 | # rank %time cumul self total #_of_calls self:ms/call tot:ms/call routine_name |
---|
| 76 | if (m/^\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+\S+\s+(.*)\s*/) { |
---|
| 77 | my $self = $1; |
---|
| 78 | my $ncalls = $2; |
---|
| 79 | my $name = $3; |
---|
| 80 | my $tid = 0; |
---|
| 81 | $name =~ s/\s+//g; |
---|
| 82 | $name =~ s/^[*]//; |
---|
| 83 | #print "$self $name\n"; |
---|
| 84 | if ($name =~ m/^(.*)[@](\d+)/) { |
---|
| 85 | $tid = $2; |
---|
| 86 | $maxomp = $tid if ($tid > $maxomp); |
---|
| 87 | $name = $1; |
---|
| 88 | } |
---|
| 89 | unless (exists($sumself{$name})) { |
---|
| 90 | $nself{$name} = 0; |
---|
| 91 | $sumself{$name} = 0; |
---|
| 92 | $sum2self{$name} = 0; |
---|
| 93 | $maxself{$name} = 0; |
---|
| 94 | $minself{$name} = $bignum; |
---|
| 95 | $numcalls{$name} = 0; |
---|
| 96 | $taskhits{$name} = 0; |
---|
| 97 | $omphits{$name} = 0; |
---|
| 98 | } |
---|
| 99 | $numcalls{$name} += $ncalls; |
---|
| 100 | $taskhits{$name} += 1 if ($tid == 1); |
---|
| 101 | $omphits{$name} += 1; |
---|
| 102 | |
---|
| 103 | # Account the most expensive OpenMP thread only per $name label |
---|
| 104 | $ompself{$name} = 0 unless (exists($ompself{$name})); |
---|
| 105 | $ompself{$name} = $self if ($self > $ompself{$name}); |
---|
| 106 | } |
---|
| 107 | } |
---|
| 108 | |
---|
| 109 | if ($nproc > 0) { |
---|
| 110 | # One final time ... |
---|
| 111 | &CumulativeUpdate(); |
---|
| 112 | |
---|
| 113 | print STDOUT "The name of the executable : $exe\n"; |
---|
| 114 | print STDOUT "Number of MPI-tasks : $nproc\n"; |
---|
| 115 | print STDOUT "Number of OpenMP-threads : $maxomp\n"; |
---|
| 116 | print STDOUT "export $str : $threshold_percent%\n"; |
---|
| 117 | $avgwall /= $nproc; |
---|
| 118 | if ($nproc > 1) { |
---|
| 119 | $stdevwall = ($stdevwall - $nproc * $avgwall * $avgwall)/($nproc - 1); |
---|
| 120 | $stdevwall = ($stdevwall > 0) ? sqrt($stdevwall) : 0; # be careful with rounding of errors |
---|
| 121 | } |
---|
| 122 | else { |
---|
| 123 | $stdevwall = 0; |
---|
| 124 | } |
---|
| 125 | #printf STDOUT ("Total time (average) : %.3f secs \n", $avgwall); |
---|
| 126 | printf STDOUT ("Wall-times over %d MPI-tasks (secs) : Min=%.3f, Max=%.3f, Avg=%.3f, StDev=%.3f\n", |
---|
| 127 | $nproc, $minwall, $maxwall, $avgwall, $stdevwall); |
---|
| 128 | my $avgpercent = $threshold_percent * $avgwall / 100.0; |
---|
| 129 | |
---|
| 130 | printf STDOUT ("Routines whose average time > %.2f%% (%.3f secs) of the total average will be included in the listing\n", |
---|
| 131 | $threshold_percent,$avgpercent); |
---|
| 132 | |
---|
| 133 | printf STDOUT ("%7s %10s %10s %10s %8s %8s %12s %13s %8s %8s : %s\n", |
---|
| 134 | "Avg-%", "Avg.secs", |
---|
| 135 | "Min.secs", "Max.secs", "St.dev", |
---|
| 136 | "Imbal-%", |
---|
| 137 | "# of calls", |
---|
| 138 | "Avg.msec/call", |
---|
| 139 | "Tasks", |
---|
| 140 | "OpenMP", |
---|
| 141 | "Name-of-the-routine"); |
---|
| 142 | |
---|
| 143 | open(PIPE,"|sort -nr|sed 's/ %/%/'"); |
---|
| 144 | |
---|
| 145 | # Values accounted for |
---|
| 146 | my $acc_avgpercent = 0; |
---|
| 147 | my $acc_avgtime = 0; |
---|
| 148 | my $acc_maxtime = 0; |
---|
| 149 | my $acc_mintime = $bignum; |
---|
| 150 | |
---|
| 151 | foreach my $name (keys %sumself) { |
---|
| 152 | my $value = $sumself{$name}; |
---|
| 153 | my $navg = $nself{$name}; |
---|
| 154 | my $avgself = $value/$navg; |
---|
| 155 | if ($avgself > $avgpercent) { |
---|
| 156 | my $percent = $value/$tottim*100; |
---|
| 157 | my $stdev = 0; |
---|
| 158 | if ($navg > 1) { |
---|
| 159 | $stdev = $sum2self{$name}; |
---|
| 160 | $stdev = ($stdev - $navg * $avgself * $avgself)/($navg - 1); |
---|
| 161 | $stdev = ($stdev > 0) ? sqrt($stdev) : 0; # be careful with rounding of errors |
---|
| 162 | } |
---|
| 163 | printf PIPE ("%6.2f %% %10.3f %10.3f %10.3f %8.3f %7.2f%% %12.0f %13.3f %8d %8d : %s\n", |
---|
| 164 | $percent, $avgself, |
---|
| 165 | $minself{$name},$maxself{$name},$stdev, |
---|
| 166 | ($maxself{$name} - $minself{$name})/$maxself{$name}*100, |
---|
| 167 | $numcalls{$name}, |
---|
| 168 | $avgself / $numcalls{$name} * 1000.0, |
---|
| 169 | $taskhits{$name}, |
---|
| 170 | $omphits{$name} / $taskhits{$name}, |
---|
| 171 | $name); |
---|
| 172 | # Update values accounted for |
---|
| 173 | $acc_avgpercent += $percent; |
---|
| 174 | $acc_avgtime += $avgself; |
---|
| 175 | $acc_mintime = $minself{$name} if ($acc_mintime > $minself{$name}); |
---|
| 176 | $acc_maxtime = $maxself{$name} if ($acc_maxtime < $maxself{$name}); |
---|
| 177 | } |
---|
| 178 | } |
---|
| 179 | close(PIPE); |
---|
| 180 | printf STDOUT ("%6.2f%% %10.3f %10.3f %10.3f\n", |
---|
| 181 | $acc_avgpercent,$acc_avgtime, |
---|
| 182 | $acc_mintime, $acc_maxtime); |
---|
| 183 | } |
---|
| 184 | |
---|
| 185 | exit 0; |
---|
| 186 | |
---|
| 187 | sub CumulativeUpdate { |
---|
| 188 | foreach my $name (keys %ompself) { |
---|
| 189 | my $self = $ompself{$name}; |
---|
| 190 | $nself{$name} += 1; |
---|
| 191 | $sumself{$name} += $self; |
---|
| 192 | $sum2self{$name} += $self * $self; |
---|
| 193 | $maxself{$name} = $self if ($self > $maxself{$name}); |
---|
| 194 | $minself{$name} = $self if ($self < $minself{$name}); |
---|
| 195 | $tottim += $self; |
---|
| 196 | } |
---|
| 197 | } |
---|
| 198 | |
---|
| 199 | # A sample of typical output |
---|
| 200 | __DATA__ |
---|
| 201 | The name of the executable : /lustre/tmp/saarines/rundir/LUMO/19/fmiopt/teho.gnu/h1/N7+1.T140+20xt1xh1.dcmp_10x14.ppn_20+20.nproma_60.hgp_undef.fmiopt-534667/MASTERODB |
---|
| 202 | Number of MPI-tasks : 140 |
---|
| 203 | Number of OpenMP-threads : 1 |
---|
| 204 | export THRESHOLD_PERCENT : 0.2% |
---|
| 205 | Wall-times over 140 MPI-tasks (secs) : Min=399.410, Max=400.320, Avg=399.876, StDev=0.220 |
---|
| 206 | Routines whose average time > 0.20% (0.800 secs) of the total average will be included in the listing |
---|
| 207 | Avg-% Avg.secs Min.secs Max.secs St.dev Imbal-% # of calls Avg.msec/call Tasks OpenMP : Name-of-the-routine |
---|
| 208 | 5.32% 21.270 19.341 24.105 0.920 19.76% 10598700 0.002 140 1 : LAITRI |
---|
| 209 | 5.18% 20.730 16.166 23.143 1.239 30.15% 698740 0.030 140 1 : VDFHGHTNHL |
---|
| 210 | 5.06% 20.215 0.469 70.180 14.105 99.33% 8540 2.367 140 1 : SLCOMM |
---|
| 211 | 4.12% 16.474 12.309 17.213 0.952 28.49% 933656 0.018 140 1 : FPCINCAPE |
---|
| 212 | 3.61% 14.429 11.903 16.617 0.774 28.37% 698740 0.021 140 1 : INITAPLPAR |
---|
| 213 | 3.02% 12.055 0.671 45.288 10.498 98.52% 6860 1.757 140 1 : SLCOMM2A |
---|
| 214 | 2.72% 10.875 5.275 16.432 2.938 67.90% 698740 0.016 140 1 : RAIN_ICE |
---|
| 215 | 2.70% 10.793 4.965 18.614 3.433 73.33% 698740 0.015 140 1 : RAIN_ICE:RAIN_ICE_SEDIMENTATION_STAT |
---|
| 216 | 2.55% 10.182 9.763 10.552 0.212 7.48% 25340 0.402 140 1 : TRMTOL |
---|
| 217 | 2.54% 10.169 8.510 10.509 0.444 19.02% 698740 0.015 140 1 : APL_AROME |
---|
| 218 | 2.51% 10.054 8.519 10.192 0.421 16.41% 1397480 0.007 140 1 : COMPUTE_FUNCTION_THERMO_MF |
---|
| 219 | 2.51% 10.038 8.181 11.849 1.156 30.96% 25340 0.396 140 1 : TRLTOG |
---|
| 220 | 2.31% 9.252 5.219 21.083 4.795 75.25% 21000 0.441 140 1 : TRLTOM |
---|
| 221 | 1.93% 7.711 1.976 24.881 6.385 92.06% 22540 0.342 140 1 : TRGTOL |
---|
| 222 | 1.91% 7.653 3.985 12.186 2.001 67.30% 698740 0.011 140 1 : RAIN_ICE:RAIN_ICE_SLOW |
---|
| 223 | 1.84% 7.364 6.510 7.976 0.403 18.38% 1413160 0.005 140 1 : ELARCHE |
---|
| 224 | 1.46% 5.825 3.269 8.145 1.265 59.86% 71300 0.082 140 1 : RRTM_RTRN1A_140GP |
---|
| 225 | 1.33% 5.301 4.389 5.637 0.229 22.14% 5589920 0.001 140 1 : TRIDIAG_MASSFLUX |
---|
| 226 | 1.30% 5.208 4.416 5.282 0.218 16.40% 1397480 0.004 140 1 : TURB:COMPUTE_FUNCTION_THERMO |
---|
| 227 | 1.25% 4.997 4.445 5.741 0.406 22.57% 9260880 0.001 140 1 : RPASSF |
---|
| 228 | 1.12% 4.489 3.801 4.562 0.189 16.68% 698740 0.006 140 1 : ACTQSAT |
---|
| 229 | 0.93% 3.699 2.879 4.401 0.341 34.58% 698740 0.005 140 1 : CONDENSATION |
---|
| 230 | 0.90% 3.588 0.507 9.260 1.812 94.52% 698740 0.005 140 1 : RAIN_ICE:RAIN_ICE_FAST_RG |
---|
| 231 | |
---|
| 232 | etc. |
---|
| 233 | |
---|
| 234 | 0.23% 0.922 0.891 0.978 0.025 8.90% 718900 0.001 140 1 : SIPTP |
---|
| 235 | 0.21% 0.840 0.583 1.262 0.160 53.80% 13440 0.063 140 1 : ESEIMPLS |
---|
| 236 | 0.21% 0.834 0.691 0.878 0.037 21.30% 698740 0.001 140 1 : LATTEX |
---|
| 237 | 0.21% 0.825 0.698 0.845 0.035 17.40% 112297500 0.000 140 1 : SWTT1 |
---|
| 238 | 0.18% 0.893 0.000 1.548 0.567 100.00% 477554 0.002 113 1 : ECUME_FLUX |
---|
| 239 | 85.41% 341.669 0.000 70.180 |
---|