| 1 | ! Conditional compilation : use scorep user task api |
|---|
| 2 | #ifdef CPP_SCOREP |
|---|
| 3 | |
|---|
| 4 | #include "scorep/SCOREP_User.inc" |
|---|
| 5 | #define SCOREP_DEFINE_HANDLE_ARRAY( handle ) SCOREP_USER_REGION_HANDLE, allocatable :: handle(:) |
|---|
| 6 | #define SCOREP_ALLOCATE_HANDLE_ARRAY( handle, size ) allocate( scorep_handle(size) ) |
|---|
| 7 | |
|---|
| 8 | #else |
|---|
| 9 | |
|---|
| 10 | #define SCOREP_DEFINE_HANDLE_ARRAY(handle) |
|---|
| 11 | #define SCOREP_ALLOCATE_HANDLE_ARRAY(handle,size) |
|---|
| 12 | #define SCOREP_USER_REGION_INIT(handle,name,region_type) |
|---|
| 13 | #define SCOREP_USER_REGION_ENTER(handle) |
|---|
| 14 | #define SCOREP_USER_REGION_END(handle) |
|---|
| 15 | |
|---|
| 16 | #endif |
|---|
| 17 | |
|---|
| 18 | MODULE profiling_physic_mod |
|---|
| 19 | use mod_phys_lmdz_omp_data, only : omp_size, omp_rank |
|---|
| 20 | use dict_int_mod |
|---|
| 21 | IMPLICIT NONE |
|---|
| 22 | SAVE |
|---|
| 23 | PRIVATE |
|---|
| 24 | |
|---|
| 25 | ! Shared variables |
|---|
| 26 | CHARACTER(*), parameter :: modname = "profiling_physic_mod" |
|---|
| 27 | LOGICAL, PARAMETER :: check_accuracy = .true. |
|---|
| 28 | INTEGER, PARAMETER :: max_id=50, max_depth=10 |
|---|
| 29 | INTEGER :: nb_id ! Number of timers |
|---|
| 30 | CHARACTER(15), DIMENSION(max_id) :: name ! Timer names |
|---|
| 31 | type(dict) :: ids |
|---|
| 32 | |
|---|
| 33 | ! Threadlocal variables |
|---|
| 34 | INTEGER, ALLOCATABLE :: depth(:)!(omp_size) ! Number of current nested timers |
|---|
| 35 | INTEGER, ALLOCATABLE :: current_id(:,:)!(omp_size, max_depth) ! Current nested timer ids |
|---|
| 36 | REAL, ALLOCATABLE :: chrono(:,:)!(omp_size, max_depth) ! Last timer start time |
|---|
| 37 | REAL, ALLOCATABLE :: elapsed(:,:)!(omp_size, max_id) ! Cumulative time for each timer |
|---|
| 38 | SCOREP_DEFINE_HANDLE_ARRAY(scorep_handle) |
|---|
| 39 | |
|---|
| 40 | LOGICAL :: profiling_enabled = .false. |
|---|
| 41 | |
|---|
| 42 | PUBLIC :: enter_profile, exit_profile, print_profile |
|---|
| 43 | |
|---|
| 44 | CONTAINS |
|---|
| 45 | ! Be sure to init mod_phys_lmdz_omp_data before profiling |
|---|
| 46 | SUBROUTINE init_profiling |
|---|
| 47 | use ioipsl_getin_p_mod, only : getin |
|---|
| 48 | call getin("phy_profiling_enable", profiling_enabled) |
|---|
| 49 | nb_id=0 |
|---|
| 50 | allocate(depth(0:omp_size-1)) |
|---|
| 51 | allocate(current_id(0:omp_size-1, max_depth)) |
|---|
| 52 | allocate(chrono(0:omp_size-1, max_depth)) |
|---|
| 53 | allocate(elapsed(0:omp_size-1, max_id)) |
|---|
| 54 | depth(:)=0 |
|---|
| 55 | SCOREP_ALLOCATE_HANDLE_ARRAY(scorep_handle, max_id) |
|---|
| 56 | call reset_profiling() |
|---|
| 57 | END SUBROUTINE init_profiling |
|---|
| 58 | |
|---|
| 59 | SUBROUTINE reset_profiling |
|---|
| 60 | if( any( depth /= 0 ) ) call abort_physic(modname, "Impossible to reset profiling : a profiling region is still open",1) |
|---|
| 61 | elapsed(:,:)=0 |
|---|
| 62 | END SUBROUTINE |
|---|
| 63 | |
|---|
| 64 | SUBROUTINE register_id(thename, id) |
|---|
| 65 | CHARACTER(*), INTENT(IN) :: thename |
|---|
| 66 | INTEGER, INTENT(OUT) :: id |
|---|
| 67 | nb_id = nb_id+1 |
|---|
| 68 | if(nb_id > max_id) call abort_physic(modname, "Too many timers",1) |
|---|
| 69 | id = nb_id |
|---|
| 70 | name(id)=thename |
|---|
| 71 | SCOREP_USER_REGION_INIT( scorep_handle(id), thename, SCOREP_USER_REGION_TYPE_COMMON ) |
|---|
| 72 | END SUBROUTINE register_id |
|---|
| 73 | |
|---|
| 74 | FUNCTION get_elapsed(start) |
|---|
| 75 | INTEGER(kind=8) :: count, count_rate |
|---|
| 76 | REAL :: start,get_elapsed |
|---|
| 77 | CALL SYSTEM_CLOCK(count,count_rate) |
|---|
| 78 | if(check_accuracy .and. count < 10) print *, "Warning : Profiling elapsed time too short to be accurately measured" |
|---|
| 79 | get_elapsed = (1.*count)/(1.*count_rate) - start |
|---|
| 80 | IF(get_elapsed<0.) get_elapsed=0. |
|---|
| 81 | END FUNCTION get_elapsed |
|---|
| 82 | |
|---|
| 83 | ! Enter profiling region with name 'name' |
|---|
| 84 | ! If 'name' doesn't exist, new id is created |
|---|
| 85 | ! Profiling region associated to name must be inactive (dosen't exist yet or 'exit_profile'd) |
|---|
| 86 | ! First call for 'name' must be called for every thread in the parallel region |
|---|
| 87 | SUBROUTINE enter_profile(name) |
|---|
| 88 | CHARACTER(*) :: name |
|---|
| 89 | integer :: id |
|---|
| 90 | logical, save :: first = .true. |
|---|
| 91 | |
|---|
| 92 | ! Initialize profiling at first call |
|---|
| 93 | if(first) then |
|---|
| 94 | call checked_omp_barrier("Timers init") |
|---|
| 95 | !$omp master |
|---|
| 96 | first = .false. |
|---|
| 97 | call init_profiling |
|---|
| 98 | !$omp end master |
|---|
| 99 | !$omp barrier |
|---|
| 100 | end if |
|---|
| 101 | if( profiling_enabled ) then |
|---|
| 102 | ! Create prof_id at first call with 'name' |
|---|
| 103 | if( .not. exists(ids, name) ) then |
|---|
| 104 | call checked_omp_barrier(name) |
|---|
| 105 | !$omp master |
|---|
| 106 | call register_id(name,id) |
|---|
| 107 | call insert_or_assign(ids, name, id) |
|---|
| 108 | !$omp end master |
|---|
| 109 | !$omp barrier |
|---|
| 110 | end if |
|---|
| 111 | call enter_profile_aux(get_val(ids, name)) |
|---|
| 112 | endif |
|---|
| 113 | END SUBROUTINE |
|---|
| 114 | |
|---|
| 115 | SUBROUTINE enter_profile_aux(id) |
|---|
| 116 | INTEGER, INTENT(IN) :: id |
|---|
| 117 | depth(omp_rank) = depth(omp_rank)+1 |
|---|
| 118 | chrono(omp_rank, depth(omp_rank)) = get_elapsed(0.) |
|---|
| 119 | current_id(omp_rank,depth(omp_rank)) = id |
|---|
| 120 | SCOREP_USER_REGION_ENTER(scorep_handle(id)) |
|---|
| 121 | END SUBROUTINE |
|---|
| 122 | |
|---|
| 123 | ! Exit profiling region with name 'name' |
|---|
| 124 | ! Region must exist and be active on current thread |
|---|
| 125 | SUBROUTINE exit_profile(name) |
|---|
| 126 | CHARACTER(*) :: name |
|---|
| 127 | if( profiling_enabled ) then |
|---|
| 128 | if(.not. exists(ids, name)) call abort_physic(modname, "Exit unknown timer", 1) |
|---|
| 129 | call exit_profile_aux(get_val(ids,name)) |
|---|
| 130 | endif |
|---|
| 131 | END SUBROUTINE |
|---|
| 132 | |
|---|
| 133 | SUBROUTINE exit_profile_aux(id) |
|---|
| 134 | INTEGER, INTENT(IN) :: id |
|---|
| 135 | INTEGER :: parent_id |
|---|
| 136 | REAL :: my_elapsed |
|---|
| 137 | |
|---|
| 138 | IF(depth(omp_rank)<=0) call abort_physic(modname, "exit_profile called without a matching enter_profile (depth=0)",1) |
|---|
| 139 | IF(id /= current_id(omp_rank, depth(omp_rank))) call abort_physic(modname, "wrong timer id : exit_profile id doesn't match enter_profile",1) |
|---|
| 140 | |
|---|
| 141 | SCOREP_USER_REGION_END(scorep_handle(id)) |
|---|
| 142 | |
|---|
| 143 | my_elapsed = get_elapsed(chrono(omp_rank, depth(omp_rank))) |
|---|
| 144 | ! add elapsed to current profile |
|---|
| 145 | elapsed(omp_rank,id) = elapsed(omp_rank,id) + my_elapsed |
|---|
| 146 | depth(omp_rank) = depth(omp_rank)-1 |
|---|
| 147 | ! and substract from parent profile |
|---|
| 148 | IF(depth(omp_rank)>0) THEN |
|---|
| 149 | parent_id = current_id(omp_rank,depth(omp_rank)) |
|---|
| 150 | elapsed(omp_rank,parent_id) = elapsed(omp_rank,parent_id) - my_elapsed |
|---|
| 151 | END IF |
|---|
| 152 | END SUBROUTINE |
|---|
| 153 | |
|---|
| 154 | ! Collective operation : must be called on every process/thread |
|---|
| 155 | SUBROUTINE print_profile |
|---|
| 156 | !use mpi_mod |
|---|
| 157 | !use mpipara |
|---|
| 158 | use mod_phys_lmdz_mpi_data, only : is_mpi_root, mpi_size |
|---|
| 159 | use mod_phys_lmdz_mpi_transfert , only : reduce_sum_mpi, reduce_min_mpi, reduce_max_mpi |
|---|
| 160 | INTEGER :: i |
|---|
| 161 | REAL :: mean_total, min_total, max_total, mean_total_local, min_total_local, max_total_local |
|---|
| 162 | REAL :: omp_mean_time_local, omp_min_time_local, omp_max_time_local |
|---|
| 163 | REAL :: mean_time, max_process, min_process, percent |
|---|
| 164 | |
|---|
| 165 | if( profiling_enabled ) then |
|---|
| 166 | call enter_profile("print_profile") |
|---|
| 167 | |
|---|
| 168 | !$OMP MASTER |
|---|
| 169 | ! mean_total : mean of sum of all timers on all threads and all procs |
|---|
| 170 | mean_total_local = SUM(elapsed(:,1:nb_id))/omp_size/mpi_size |
|---|
| 171 | !call MPI_Reduce(mean_total_local, mean_total, 1, MPI_DOUBLE, MPI_SUM, 0, comm_icosa, ierr) |
|---|
| 172 | call reduce_sum_mpi( mean_total_local, mean_total ) |
|---|
| 173 | ! min_total : min on all procs of sum of all timers on all threads |
|---|
| 174 | min_total_local = MINVAL( SUM(elapsed(:,1:nb_id), 2) ) |
|---|
| 175 | !call MPI_Reduce(min_total_local, min_total, 1, MPI_DOUBLE, MPI_MIN, 0, comm_icosa, ierr) |
|---|
| 176 | call reduce_min_mpi(min_total_local, min_total) |
|---|
| 177 | ! max_total : max on all procs of sum of all timers on all threads |
|---|
| 178 | max_total_local = MAXVAL( SUM(elapsed(:,1:nb_id), 2) ) |
|---|
| 179 | !call MPI_Reduce(max_total_local, max_total, 1, MPI_DOUBLE, MPI_MAX, 0, comm_icosa, ierr) |
|---|
| 180 | call reduce_max_mpi(max_total_local, max_total) |
|---|
| 181 | |
|---|
| 182 | if(is_mpi_root) PRINT *, '---------------------- Profiling (Physic) --------------' |
|---|
| 183 | if(is_mpi_root) PRINT ('(A15, F12.3, A, F12.3, A, F12.3, A)'), 'Total (Physic) (s) : ', mean_total, " [", min_total, ",", max_total, "]" |
|---|
| 184 | if(is_mpi_root) PRINT ('(A15, A7, A47, A47)'), " "," % "," Process mean(s)[ min, max, diff/mean(%) ] ", "Process 1 threads(s)[ min, max, diff/mean(%) ]" |
|---|
| 185 | DO i=1,nb_id |
|---|
| 186 | omp_mean_time_local = SUM(elapsed(:,i))/omp_size ! Mean time of timer on local threads |
|---|
| 187 | omp_min_time_local = MINVAL(elapsed(:,i)) ! Max time of timer on local threads |
|---|
| 188 | omp_max_time_local = MAXVAL(elapsed(:,i)) ! Min time of timer on local threads |
|---|
| 189 | !omp_inbalance_local = 100*(max_time-min_time)/min_time |
|---|
| 190 | |
|---|
| 191 | ! mean_time : mean of timer on all threads and all procs |
|---|
| 192 | !call MPI_Reduce(omp_mean_time_local, mean_time, 1, MPI_DOUBLE, MPI_SUM, 0, comm_icosa, ierr); |
|---|
| 193 | call reduce_sum_mpi( omp_mean_time_local, mean_time ) |
|---|
| 194 | mean_time = mean_time/mpi_size |
|---|
| 195 | ! max_process_time : max on procs of mean omp time |
|---|
| 196 | !call MPI_Reduce(omp_mean_time_local, max_process, 1, MPI_DOUBLE, MPI_MAX, 0, comm_icosa, ierr) |
|---|
| 197 | call reduce_max_mpi(omp_mean_time_local, max_process) |
|---|
| 198 | ! min_process_time : min on procs of mean omp time |
|---|
| 199 | !call MPI_Reduce(omp_mean_time_local, min_process, 1, MPI_DOUBLE, MPI_MIN, 0, comm_icosa, ierr) |
|---|
| 200 | call reduce_min_mpi(omp_mean_time_local, min_process) |
|---|
| 201 | |
|---|
| 202 | percent = 100*mean_time/mean_total |
|---|
| 203 | |
|---|
| 204 | if(is_mpi_root) PRINT ('(A15, F5.1, F12.3, A, F12.3, A, F12.3, A, F6.1, A, F12.3, A, F12.3, A, F12.3, A, F6.1, A)'), name(i), percent, mean_time, " [", min_process, ",", max_process, "," , inbalance(mean_time, min_process, max_process), "]", omp_mean_time_local, " [", omp_min_time_local, ",", omp_max_time_local, "," , inbalance(omp_mean_time_local, omp_min_time_local, omp_max_time_local) , "]" |
|---|
| 205 | END DO |
|---|
| 206 | if(is_mpi_root) PRINT *, '---------------------- Profiling -----------------------' |
|---|
| 207 | !$OMP END MASTER |
|---|
| 208 | call exit_profile("print_profile") |
|---|
| 209 | endif |
|---|
| 210 | contains |
|---|
| 211 | function inbalance( mean, min, max ) |
|---|
| 212 | real :: mean, min, max |
|---|
| 213 | real :: inbalance |
|---|
| 214 | inbalance = 100*(max-min)/(mean+1E-10) |
|---|
| 215 | end function |
|---|
| 216 | END SUBROUTINE print_profile |
|---|
| 217 | |
|---|
| 218 | #ifdef CPP_OMP |
|---|
| 219 | subroutine checked_omp_barrier(message) |
|---|
| 220 | use omp_lib, only : omp_get_team_size |
|---|
| 221 | character(*), intent(in) :: message |
|---|
| 222 | integer, save :: count |
|---|
| 223 | count = 0 |
|---|
| 224 | !$omp barrier |
|---|
| 225 | !$omp atomic update |
|---|
| 226 | count = count + 1 |
|---|
| 227 | !$omp barrier |
|---|
| 228 | if( count /= omp_get_team_size(1) ) then |
|---|
| 229 | print *, "Error in barrier in physic timers : ", trim(message) |
|---|
| 230 | print *, count,"/",omp_get_team_size(1), "threads participated" |
|---|
| 231 | call abort_physic(modname, "Timer barrier",1) |
|---|
| 232 | end if |
|---|
| 233 | end subroutine checked_omp_barrier |
|---|
| 234 | #else |
|---|
| 235 | subroutine checked_omp_barrier(message) |
|---|
| 236 | character(*), intent(in) :: message |
|---|
| 237 | end subroutine checked_omp_barrier |
|---|
| 238 | #endif |
|---|
| 239 | |
|---|
| 240 | END MODULE profiling_physic_mod |
|---|