source: LMDZ6/branches/Optimisation_LMDZ/libf/phy_common/profiling_physic.F90 @ 4026

Last change on this file since 4026 was 3760, checked in by adurocher, 4 years ago

Use blocks to allocate compressed arrays to the right size

File size: 9.7 KB
Line 
1! Conditional compilation : use scorep user task api
2#ifdef CPP_SCOREP
3
4#include "scorep/SCOREP_User.inc"
5#define SCOREP_DEFINE_HANDLE_ARRAY( handle ) SCOREP_USER_REGION_HANDLE, allocatable :: handle(:)
6#define SCOREP_ALLOCATE_HANDLE_ARRAY( handle, size ) allocate( scorep_handle(size) )
7
8#else
9
10#define SCOREP_DEFINE_HANDLE_ARRAY(handle)
11#define SCOREP_ALLOCATE_HANDLE_ARRAY(handle,size)
12#define SCOREP_USER_REGION_INIT(handle,name,region_type)
13#define SCOREP_USER_REGION_ENTER(handle)
14#define SCOREP_USER_REGION_END(handle)
15
16#endif
17
18MODULE profiling_physic_mod
19  use mod_phys_lmdz_omp_data, only : omp_size, omp_rank
20  use dict_int_mod
21  IMPLICIT NONE
22  SAVE
23  PRIVATE
24 
25  ! Shared variables
26  CHARACTER(*), parameter :: modname = "profiling_physic_mod"
27  LOGICAL, PARAMETER :: check_accuracy = .true.
28  INTEGER, PARAMETER :: max_id=50, max_depth=10
29  INTEGER :: nb_id ! Number of timers
30  CHARACTER(15), DIMENSION(max_id) :: name ! Timer names
31  type(dict) :: ids
32 
33  ! Threadlocal variables
34  INTEGER, ALLOCATABLE :: depth(:)!(omp_size) ! Number of current nested timers
35  INTEGER, ALLOCATABLE :: current_id(:,:)!(omp_size, max_depth) ! Current nested timer ids
36  REAL, ALLOCATABLE :: chrono(:,:)!(omp_size, max_depth) ! Last timer start time
37  REAL, ALLOCATABLE :: elapsed(:,:)!(omp_size, max_id) ! Cumulative time for each timer
38  SCOREP_DEFINE_HANDLE_ARRAY(scorep_handle)
39
40  LOGICAL :: profiling_enabled = .false.
41 
42  PUBLIC :: enter_profile, exit_profile, print_profile
43
44CONTAINS
45  ! Be sure to init mod_phys_lmdz_omp_data before profiling
46  SUBROUTINE init_profiling
47    use ioipsl_getin_p_mod, only : getin
48    call getin("phy_profiling_enable", profiling_enabled)
49    nb_id=0
50    allocate(depth(0:omp_size-1))
51    allocate(current_id(0:omp_size-1, max_depth))
52    allocate(chrono(0:omp_size-1, max_depth))
53    allocate(elapsed(0:omp_size-1, max_id))
54    depth(:)=0
55    SCOREP_ALLOCATE_HANDLE_ARRAY(scorep_handle, max_id)
56    call reset_profiling()
57  END SUBROUTINE init_profiling
58 
59  SUBROUTINE reset_profiling
60    if( any( depth /= 0 ) ) call abort_physic(modname, "Impossible to reset profiling : a profiling region is still open",1)   
61    elapsed(:,:)=0   
62  END SUBROUTINE
63
64  SUBROUTINE register_id(thename, id)
65    CHARACTER(*), INTENT(IN) :: thename
66    INTEGER, INTENT(OUT) :: id
67    nb_id = nb_id+1
68    if(nb_id > max_id) call abort_physic(modname, "Too many timers",1)
69    id = nb_id
70    name(id)=thename
71    SCOREP_USER_REGION_INIT( scorep_handle(id), thename, SCOREP_USER_REGION_TYPE_COMMON )
72  END SUBROUTINE register_id
73
74  FUNCTION get_elapsed(start)
75    INTEGER(kind=8) :: count, count_rate
76    REAL :: start,get_elapsed
77    CALL SYSTEM_CLOCK(count,count_rate)
78    if(check_accuracy .and. count < 10) print *, "Warning : Profiling elapsed time too short to be accurately measured"
79    get_elapsed = (1.*count)/(1.*count_rate) - start
80    IF(get_elapsed<0.) get_elapsed=0.
81  END FUNCTION get_elapsed
82
83  ! Enter profiling region with name 'name'
84  ! If 'name' doesn't exist, new id is created
85  ! Profiling region associated to name must be inactive (dosen't exist yet or 'exit_profile'd)
86  ! First call for 'name' must be called for every thread in the parallel region
87  SUBROUTINE enter_profile(name)
88    CHARACTER(*) :: name
89    integer :: id
90    logical, save :: first = .true.
91
92    ! Initialize profiling at first call
93    if(first) then
94      call checked_omp_barrier("Timers init")
95      !$omp master
96      first = .false.
97      call init_profiling
98      !$omp end master
99      !$omp barrier
100    end if
101    if( profiling_enabled ) then
102      ! Create prof_id at first call with 'name'
103      if( .not. exists(ids, name) ) then
104        call checked_omp_barrier(name)
105        !$omp master
106        call register_id(name,id)
107        call insert_or_assign(ids, name, id)
108        !$omp end master
109        !$omp barrier
110      end if
111      call enter_profile_aux(get_val(ids, name))
112    endif
113  END SUBROUTINE
114 
115  SUBROUTINE enter_profile_aux(id)
116    INTEGER, INTENT(IN) :: id
117    depth(omp_rank) = depth(omp_rank)+1
118    chrono(omp_rank, depth(omp_rank)) = get_elapsed(0.)
119    current_id(omp_rank,depth(omp_rank)) = id
120    SCOREP_USER_REGION_ENTER(scorep_handle(id))
121  END SUBROUTINE
122 
123  ! Exit profiling region with name 'name'
124  ! Region must exist and be active on current thread
125  SUBROUTINE exit_profile(name)
126    CHARACTER(*) :: name
127    if( profiling_enabled ) then   
128      if(.not. exists(ids, name)) call abort_physic(modname, "Exit unknown timer", 1)
129      call exit_profile_aux(get_val(ids,name))
130    endif
131  END SUBROUTINE
132   
133  SUBROUTINE exit_profile_aux(id)
134    INTEGER, INTENT(IN) :: id
135    INTEGER :: parent_id
136    REAL :: my_elapsed
137   
138    IF(depth(omp_rank)<=0) call abort_physic(modname, "exit_profile called without a matching enter_profile (depth=0)",1)
139    IF(id /= current_id(omp_rank, depth(omp_rank))) call abort_physic(modname, "wrong timer id : exit_profile id doesn't match enter_profile",1)
140   
141    SCOREP_USER_REGION_END(scorep_handle(id))
142   
143    my_elapsed = get_elapsed(chrono(omp_rank, depth(omp_rank)))
144    ! add elapsed to current profile
145    elapsed(omp_rank,id) = elapsed(omp_rank,id) + my_elapsed
146    depth(omp_rank) = depth(omp_rank)-1
147    ! and substract from parent profile
148    IF(depth(omp_rank)>0) THEN
149       parent_id = current_id(omp_rank,depth(omp_rank))
150       elapsed(omp_rank,parent_id) = elapsed(omp_rank,parent_id) - my_elapsed
151    END IF
152  END SUBROUTINE
153
154  ! Collective operation : must be called on every process/thread
155  SUBROUTINE print_profile
156    !use mpi_mod
157    !use mpipara
158    use mod_phys_lmdz_mpi_data, only : is_mpi_root, mpi_size
159    use mod_phys_lmdz_mpi_transfert , only : reduce_sum_mpi, reduce_min_mpi, reduce_max_mpi
160    INTEGER :: i
161    REAL :: mean_total, min_total, max_total, mean_total_local, min_total_local, max_total_local
162    REAL :: omp_mean_time_local, omp_min_time_local, omp_max_time_local
163    REAL :: mean_time, max_process, min_process, percent
164
165    if( profiling_enabled ) then
166      call enter_profile("print_profile")
167
168      !$OMP MASTER
169      ! mean_total : mean of sum of all timers on all threads and all procs
170      mean_total_local = SUM(elapsed(:,1:nb_id))/omp_size/mpi_size
171      !call MPI_Reduce(mean_total_local, mean_total, 1, MPI_DOUBLE, MPI_SUM, 0, comm_icosa, ierr)
172      call reduce_sum_mpi( mean_total_local, mean_total )
173      ! min_total : min on all procs of sum of all timers on all threads
174      min_total_local = MINVAL( SUM(elapsed(:,1:nb_id), 2) )
175      !call MPI_Reduce(min_total_local, min_total, 1, MPI_DOUBLE, MPI_MIN, 0, comm_icosa, ierr)
176      call reduce_min_mpi(min_total_local, min_total)
177      ! max_total : max on all procs of sum of all timers on all threads
178      max_total_local = MAXVAL( SUM(elapsed(:,1:nb_id), 2) )
179      !call MPI_Reduce(max_total_local, max_total, 1, MPI_DOUBLE, MPI_MAX, 0, comm_icosa, ierr)
180      call reduce_max_mpi(max_total_local, max_total)
181
182      if(is_mpi_root) PRINT *, '---------------------- Profiling (Physic) --------------'
183      if(is_mpi_root) PRINT ('(A15, F12.3, A, F12.3, A, F12.3, A)'), 'Total (Physic) (s) : ', mean_total, "  [", min_total, ",", max_total, "]"
184      if(is_mpi_root) PRINT ('(A15, A7, A47, A47)'), " ","  %  ","  Process mean(s)[ min, max, diff/mean(%) ]  ", "Process 1 threads(s)[ min, max, diff/mean(%) ]"
185      DO i=1,nb_id
186        omp_mean_time_local = SUM(elapsed(:,i))/omp_size ! Mean time of timer on local threads
187        omp_min_time_local = MINVAL(elapsed(:,i)) ! Max time of timer on local threads
188        omp_max_time_local = MAXVAL(elapsed(:,i)) ! Min time of timer on local threads
189        !omp_inbalance_local = 100*(max_time-min_time)/min_time
190       
191        ! mean_time : mean of timer on all threads and all procs
192        !call MPI_Reduce(omp_mean_time_local, mean_time, 1, MPI_DOUBLE, MPI_SUM, 0, comm_icosa, ierr);
193        call reduce_sum_mpi( omp_mean_time_local, mean_time )
194        mean_time = mean_time/mpi_size
195        ! max_process_time : max on procs of mean omp time
196        !call MPI_Reduce(omp_mean_time_local, max_process, 1, MPI_DOUBLE, MPI_MAX, 0, comm_icosa, ierr)
197        call reduce_max_mpi(omp_mean_time_local, max_process)
198        ! min_process_time : min on procs of mean omp time
199        !call MPI_Reduce(omp_mean_time_local, min_process, 1, MPI_DOUBLE, MPI_MIN, 0, comm_icosa, ierr)
200        call reduce_min_mpi(omp_mean_time_local, min_process)
201
202        percent = 100*mean_time/mean_total
203
204        if(is_mpi_root) PRINT ('(A15, F5.1, F12.3, A, F12.3, A, F12.3, A, F6.1, A, F12.3, A, F12.3, A, F12.3, A, F6.1, A)'), name(i), percent, mean_time, " [", min_process, ",", max_process, "," , inbalance(mean_time, min_process, max_process),  "]", omp_mean_time_local, " [", omp_min_time_local, ",", omp_max_time_local, "," , inbalance(omp_mean_time_local, omp_min_time_local, omp_max_time_local) , "]"
205      END DO
206      if(is_mpi_root) PRINT *, '---------------------- Profiling -----------------------'
207      !$OMP END MASTER
208      call exit_profile("print_profile")
209    endif
210  contains
211    function inbalance( mean, min, max )
212      real :: mean, min, max
213      real :: inbalance
214      inbalance = 100*(max-min)/(mean+1E-10)
215    end function
216  END SUBROUTINE print_profile
217
218#ifdef CPP_OMP
219  subroutine checked_omp_barrier(message)
220    use omp_lib, only : omp_get_team_size
221    character(*), intent(in) :: message
222    integer, save :: count
223    count = 0
224    !$omp barrier
225    !$omp atomic update
226    count = count + 1
227    !$omp barrier
228    if( count /= omp_get_team_size(1) ) then
229       print *, "Error in barrier in physic timers : ", trim(message)
230       print *, count,"/",omp_get_team_size(1), "threads participated"
231       call abort_physic(modname, "Timer barrier",1)
232     end if
233  end subroutine checked_omp_barrier
234#else
235  subroutine checked_omp_barrier(message)
236    character(*), intent(in) :: message
237  end subroutine checked_omp_barrier
238#endif
239   
240END MODULE profiling_physic_mod
Note: See TracBrowser for help on using the repository browser.