1 | #!/usr/bin/perl |
---|
2 | |
---|
3 | # |
---|
4 | # drhook_merge_walltime.pl |
---|
5 | # |
---|
6 | # For merging wall clock time results from different MPI-tasks |
---|
7 | # i.e. DR_HOOK_OPT=prof |
---|
8 | # |
---|
9 | # Original script by Eckhard Tschirschnitz, Cray, 2006 (Mflop/s) |
---|
10 | # |
---|
11 | # Usage: cat drhook.* | perl -w drhook_merge_walltime.pl |
---|
12 | # |
---|
13 | |
---|
14 | use strict; |
---|
15 | use warnings; |
---|
16 | |
---|
17 | # this expects concatenated dr_hook listings (wall clock time listings) |
---|
18 | |
---|
19 | my $bignum = 999999999; |
---|
20 | |
---|
21 | my $skip = 1; |
---|
22 | my $str = "THRESHOLD_PERCENT"; |
---|
23 | my $threshold_percent = exists($ENV{$str}) ? $ENV{$str} : 0.1; |
---|
24 | my $tottim = 0; |
---|
25 | |
---|
26 | my $maxwall = 0; |
---|
27 | my $minwall = $bignum; |
---|
28 | my $avgwall = 0; |
---|
29 | my $stdevwall = 0; |
---|
30 | |
---|
31 | my $nproc = 0; # no of MPI-tasks |
---|
32 | my $maxomp = 0; # max. no of OpenMP-threads encountered |
---|
33 | my $exe = ""; # the name of the executable |
---|
34 | |
---|
35 | my %nself = (); |
---|
36 | my %sumself = (); |
---|
37 | my %sum2self = (); |
---|
38 | my %maxself = (); |
---|
39 | my %minself = (); |
---|
40 | my %ompself = (); |
---|
41 | |
---|
42 | my %numcalls = (); |
---|
43 | |
---|
44 | my %taskhits = (); |
---|
45 | my %omphits = (); |
---|
46 | |
---|
47 | RECORD: for (<>) { |
---|
48 | chomp; # get rid of the newline character |
---|
49 | |
---|
50 | next RECORD if (m/^\s*$/); # a blank line |
---|
51 | |
---|
52 | if (m/^\s*Profiling\b/) { |
---|
53 | if ($nproc == 0) { |
---|
54 | $exe = $1 if (m/program='([^\'].*)'/); |
---|
55 | } |
---|
56 | $nproc++; |
---|
57 | $skip = 1; |
---|
58 | } |
---|
59 | elsif (m/^\s+Wall-time\s+is\s+(\S+)\s+/) { |
---|
60 | my $value = $1; |
---|
61 | $maxwall = $value if ($value > $maxwall); |
---|
62 | $minwall = $value if ($value < $minwall); |
---|
63 | $avgwall += $value; |
---|
64 | $stdevwall += $value * $value; |
---|
65 | $skip = 1; |
---|
66 | } |
---|
67 | elsif (m/^\s+1\s+/) { # the first record of (any) file encountered |
---|
68 | &CumulativeUpdate(); |
---|
69 | %ompself = (); |
---|
70 | $skip = 0; |
---|
71 | } |
---|
72 | |
---|
73 | next RECORD if ($skip); |
---|
74 | |
---|
75 | # rank %time cumul self total #_of_calls self:ms/call tot:ms/call routine_name |
---|
76 | if (m/^\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+\S+\s+(.*)\s*/) { |
---|
77 | my $self = $1; |
---|
78 | my $ncalls = $2; |
---|
79 | my $name = $3; |
---|
80 | my $tid = 0; |
---|
81 | $name =~ s/\s+//g; |
---|
82 | $name =~ s/^[*]//; |
---|
83 | #print "$self $name\n"; |
---|
84 | if ($name =~ m/^(.*)[@](\d+)/) { |
---|
85 | $tid = $2; |
---|
86 | $maxomp = $tid if ($tid > $maxomp); |
---|
87 | $name = $1; |
---|
88 | } |
---|
89 | unless (exists($sumself{$name})) { |
---|
90 | $nself{$name} = 0; |
---|
91 | $sumself{$name} = 0; |
---|
92 | $sum2self{$name} = 0; |
---|
93 | $maxself{$name} = 0; |
---|
94 | $minself{$name} = $bignum; |
---|
95 | $numcalls{$name} = 0; |
---|
96 | $taskhits{$name} = 0; |
---|
97 | $omphits{$name} = 0; |
---|
98 | } |
---|
99 | $numcalls{$name} += $ncalls; |
---|
100 | $taskhits{$name} += 1 if ($tid == 1); |
---|
101 | $omphits{$name} += 1; |
---|
102 | |
---|
103 | # Account the most expensive OpenMP thread only per $name label |
---|
104 | $ompself{$name} = 0 unless (exists($ompself{$name})); |
---|
105 | $ompself{$name} = $self if ($self > $ompself{$name}); |
---|
106 | } |
---|
107 | } |
---|
108 | |
---|
109 | if ($nproc > 0) { |
---|
110 | # One final time ... |
---|
111 | &CumulativeUpdate(); |
---|
112 | |
---|
113 | print STDOUT "The name of the executable : $exe\n"; |
---|
114 | print STDOUT "Number of MPI-tasks : $nproc\n"; |
---|
115 | print STDOUT "Number of OpenMP-threads : $maxomp\n"; |
---|
116 | print STDOUT "export $str : $threshold_percent%\n"; |
---|
117 | $avgwall /= $nproc; |
---|
118 | if ($nproc > 1) { |
---|
119 | $stdevwall = ($stdevwall - $nproc * $avgwall * $avgwall)/($nproc - 1); |
---|
120 | $stdevwall = ($stdevwall > 0) ? sqrt($stdevwall) : 0; # be careful with rounding of errors |
---|
121 | } |
---|
122 | else { |
---|
123 | $stdevwall = 0; |
---|
124 | } |
---|
125 | #printf STDOUT ("Total time (average) : %.3f secs \n", $avgwall); |
---|
126 | printf STDOUT ("Wall-times over %d MPI-tasks (secs) : Min=%.3f, Max=%.3f, Avg=%.3f, StDev=%.3f\n", |
---|
127 | $nproc, $minwall, $maxwall, $avgwall, $stdevwall); |
---|
128 | my $avgpercent = $threshold_percent * $avgwall / 100.0; |
---|
129 | |
---|
130 | printf STDOUT ("Routines whose average time > %.2f%% (%.3f secs) of the total average will be included in the listing\n", |
---|
131 | $threshold_percent,$avgpercent); |
---|
132 | |
---|
133 | printf STDOUT ("%7s %10s %10s %10s %8s %8s %12s %13s %8s %8s : %s\n", |
---|
134 | "Avg-%", "Avg.secs", |
---|
135 | "Min.secs", "Max.secs", "St.dev", |
---|
136 | "Imbal-%", |
---|
137 | "# of calls", |
---|
138 | "Avg.msec/call", |
---|
139 | "Tasks", |
---|
140 | "OpenMP", |
---|
141 | "Name-of-the-routine"); |
---|
142 | |
---|
143 | open(PIPE,"|sort -nr|sed 's/ %/%/'"); |
---|
144 | |
---|
145 | # Values accounted for |
---|
146 | my $acc_avgpercent = 0; |
---|
147 | my $acc_avgtime = 0; |
---|
148 | my $acc_maxtime = 0; |
---|
149 | my $acc_mintime = $bignum; |
---|
150 | |
---|
151 | foreach my $name (keys %sumself) { |
---|
152 | my $value = $sumself{$name}; |
---|
153 | my $navg = $nself{$name}; |
---|
154 | my $avgself = $value/$navg; |
---|
155 | if ($avgself > $avgpercent) { |
---|
156 | my $percent = $value/$tottim*100; |
---|
157 | my $stdev = 0; |
---|
158 | if ($navg > 1) { |
---|
159 | $stdev = $sum2self{$name}; |
---|
160 | $stdev = ($stdev - $navg * $avgself * $avgself)/($navg - 1); |
---|
161 | $stdev = ($stdev > 0) ? sqrt($stdev) : 0; # be careful with rounding of errors |
---|
162 | } |
---|
163 | printf PIPE ("%6.2f %% %10.3f %10.3f %10.3f %8.3f %7.2f%% %12.0f %13.3f %8d %8d : %s\n", |
---|
164 | $percent, $avgself, |
---|
165 | $minself{$name},$maxself{$name},$stdev, |
---|
166 | ($maxself{$name} - $minself{$name})/$maxself{$name}*100, |
---|
167 | $numcalls{$name}, |
---|
168 | $avgself / $numcalls{$name} * 1000.0, |
---|
169 | $taskhits{$name}, |
---|
170 | $omphits{$name} / $taskhits{$name}, |
---|
171 | $name); |
---|
172 | # Update values accounted for |
---|
173 | $acc_avgpercent += $percent; |
---|
174 | $acc_avgtime += $avgself; |
---|
175 | $acc_mintime = $minself{$name} if ($acc_mintime > $minself{$name}); |
---|
176 | $acc_maxtime = $maxself{$name} if ($acc_maxtime < $maxself{$name}); |
---|
177 | } |
---|
178 | } |
---|
179 | close(PIPE); |
---|
180 | printf STDOUT ("%6.2f%% %10.3f %10.3f %10.3f\n", |
---|
181 | $acc_avgpercent,$acc_avgtime, |
---|
182 | $acc_mintime, $acc_maxtime); |
---|
183 | } |
---|
184 | |
---|
185 | exit 0; |
---|
186 | |
---|
187 | sub CumulativeUpdate { |
---|
188 | foreach my $name (keys %ompself) { |
---|
189 | my $self = $ompself{$name}; |
---|
190 | $nself{$name} += 1; |
---|
191 | $sumself{$name} += $self; |
---|
192 | $sum2self{$name} += $self * $self; |
---|
193 | $maxself{$name} = $self if ($self > $maxself{$name}); |
---|
194 | $minself{$name} = $self if ($self < $minself{$name}); |
---|
195 | $tottim += $self; |
---|
196 | } |
---|
197 | } |
---|
198 | |
---|
199 | # A sample of typical output |
---|
200 | __DATA__ |
---|
201 | The name of the executable : /lustre/tmp/saarines/rundir/LUMO/19/fmiopt/teho.gnu/h1/N7+1.T140+20xt1xh1.dcmp_10x14.ppn_20+20.nproma_60.hgp_undef.fmiopt-534667/MASTERODB |
---|
202 | Number of MPI-tasks : 140 |
---|
203 | Number of OpenMP-threads : 1 |
---|
204 | export THRESHOLD_PERCENT : 0.2% |
---|
205 | Wall-times over 140 MPI-tasks (secs) : Min=399.410, Max=400.320, Avg=399.876, StDev=0.220 |
---|
206 | Routines whose average time > 0.20% (0.800 secs) of the total average will be included in the listing |
---|
207 | Avg-% Avg.secs Min.secs Max.secs St.dev Imbal-% # of calls Avg.msec/call Tasks OpenMP : Name-of-the-routine |
---|
208 | 5.32% 21.270 19.341 24.105 0.920 19.76% 10598700 0.002 140 1 : LAITRI |
---|
209 | 5.18% 20.730 16.166 23.143 1.239 30.15% 698740 0.030 140 1 : VDFHGHTNHL |
---|
210 | 5.06% 20.215 0.469 70.180 14.105 99.33% 8540 2.367 140 1 : SLCOMM |
---|
211 | 4.12% 16.474 12.309 17.213 0.952 28.49% 933656 0.018 140 1 : FPCINCAPE |
---|
212 | 3.61% 14.429 11.903 16.617 0.774 28.37% 698740 0.021 140 1 : INITAPLPAR |
---|
213 | 3.02% 12.055 0.671 45.288 10.498 98.52% 6860 1.757 140 1 : SLCOMM2A |
---|
214 | 2.72% 10.875 5.275 16.432 2.938 67.90% 698740 0.016 140 1 : RAIN_ICE |
---|
215 | 2.70% 10.793 4.965 18.614 3.433 73.33% 698740 0.015 140 1 : RAIN_ICE:RAIN_ICE_SEDIMENTATION_STAT |
---|
216 | 2.55% 10.182 9.763 10.552 0.212 7.48% 25340 0.402 140 1 : TRMTOL |
---|
217 | 2.54% 10.169 8.510 10.509 0.444 19.02% 698740 0.015 140 1 : APL_AROME |
---|
218 | 2.51% 10.054 8.519 10.192 0.421 16.41% 1397480 0.007 140 1 : COMPUTE_FUNCTION_THERMO_MF |
---|
219 | 2.51% 10.038 8.181 11.849 1.156 30.96% 25340 0.396 140 1 : TRLTOG |
---|
220 | 2.31% 9.252 5.219 21.083 4.795 75.25% 21000 0.441 140 1 : TRLTOM |
---|
221 | 1.93% 7.711 1.976 24.881 6.385 92.06% 22540 0.342 140 1 : TRGTOL |
---|
222 | 1.91% 7.653 3.985 12.186 2.001 67.30% 698740 0.011 140 1 : RAIN_ICE:RAIN_ICE_SLOW |
---|
223 | 1.84% 7.364 6.510 7.976 0.403 18.38% 1413160 0.005 140 1 : ELARCHE |
---|
224 | 1.46% 5.825 3.269 8.145 1.265 59.86% 71300 0.082 140 1 : RRTM_RTRN1A_140GP |
---|
225 | 1.33% 5.301 4.389 5.637 0.229 22.14% 5589920 0.001 140 1 : TRIDIAG_MASSFLUX |
---|
226 | 1.30% 5.208 4.416 5.282 0.218 16.40% 1397480 0.004 140 1 : TURB:COMPUTE_FUNCTION_THERMO |
---|
227 | 1.25% 4.997 4.445 5.741 0.406 22.57% 9260880 0.001 140 1 : RPASSF |
---|
228 | 1.12% 4.489 3.801 4.562 0.189 16.68% 698740 0.006 140 1 : ACTQSAT |
---|
229 | 0.93% 3.699 2.879 4.401 0.341 34.58% 698740 0.005 140 1 : CONDENSATION |
---|
230 | 0.90% 3.588 0.507 9.260 1.812 94.52% 698740 0.005 140 1 : RAIN_ICE:RAIN_ICE_FAST_RG |
---|
231 | |
---|
232 | etc. |
---|
233 | |
---|
234 | 0.23% 0.922 0.891 0.978 0.025 8.90% 718900 0.001 140 1 : SIPTP |
---|
235 | 0.21% 0.840 0.583 1.262 0.160 53.80% 13440 0.063 140 1 : ESEIMPLS |
---|
236 | 0.21% 0.834 0.691 0.878 0.037 21.30% 698740 0.001 140 1 : LATTEX |
---|
237 | 0.21% 0.825 0.698 0.845 0.035 17.40% 112297500 0.000 140 1 : SWTT1 |
---|
238 | 0.18% 0.893 0.000 1.548 0.567 100.00% 477554 0.002 113 1 : ECUME_FLUX |
---|
239 | 85.41% 341.669 0.000 70.180 |
---|