! $Id$ MODULE parallel_lmdz USE mod_const_mpi USE lmdz_mpi, ONLY: using_mpi USE IOIPSL INTEGER, PARAMETER :: halo_max = 3 LOGICAL, SAVE :: using_omp ! .TRUE. if using OpenMP LOGICAL, SAVE :: is_master ! .TRUE. if the core is both MPI & OpenMP master !$OMP THREADPRIVATE(is_master) INTEGER, save :: mpi_size INTEGER, save :: mpi_rank INTEGER, save :: jj_begin INTEGER, save :: jj_end INTEGER, save :: jj_nb INTEGER, save :: ij_begin INTEGER, save :: ij_end logical, save :: pole_nord logical, save :: pole_sud INTEGER, save :: jjb_u INTEGER, save :: jje_u INTEGER, save :: jjnb_u INTEGER, save :: jjb_v INTEGER, save :: jje_v INTEGER, save :: jjnb_v INTEGER, save :: ijb_u INTEGER, save :: ije_u INTEGER, save :: ijnb_u INTEGER, save :: ijb_v INTEGER, save :: ije_v INTEGER, save :: ijnb_v INTEGER, ALLOCATABLE, SAVE, DIMENSION(:) :: jj_begin_para INTEGER, ALLOCATABLE, SAVE, DIMENSION(:) :: jj_end_para INTEGER, ALLOCATABLE, SAVE, DIMENSION(:) :: jj_nb_para INTEGER, save :: OMP_CHUNK INTEGER, save :: omp_rank INTEGER, save :: omp_size !$OMP THREADPRIVATE(omp_rank) TYPE distrib INTEGER :: jj_begin INTEGER :: jj_end INTEGER :: jj_nb INTEGER :: ij_begin INTEGER :: ij_end INTEGER :: jjb_u INTEGER :: jje_u INTEGER :: jjnb_u INTEGER :: jjb_v INTEGER :: jje_v INTEGER :: jjnb_v INTEGER :: ijb_u INTEGER :: ije_u INTEGER :: ijnb_u INTEGER :: ijb_v INTEGER :: ije_v INTEGER :: ijnb_v INTEGER, pointer :: jj_begin_para(:) => NULL() INTEGER, pointer :: jj_end_para(:) => NULL() INTEGER, pointer :: jj_nb_para(:) => NULL() END TYPE distrib INTERFACE ASSIGNMENT (=) MODULE PROCEDURE copy_distrib END INTERFACE TYPE(distrib), SAVE :: current_dist CONTAINS SUBROUTINE init_parallel USE lmdz_vampir USE lmdz_mpi USE lmdz_iniprint, ONLY: lunout, prt_level USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE INTEGER :: ierr INTEGER :: i, j INTEGER :: type_size INTEGER, DIMENSION(3) :: blocklen, type INTEGER :: comp_id CHARACTER(LEN = 4) :: num CHARACTER(LEN = 20) :: filename #ifdef CPP_OMP INTEGER :: OMP_GET_NUM_THREADS EXTERNAL OMP_GET_NUM_THREADS INTEGER :: OMP_GET_THREAD_NUM EXTERNAL OMP_GET_THREAD_NUM #endif #ifdef CPP_OMP using_OMP=.TRUE. #else using_OMP = .FALSE. #endif CALL InitVampir IF (using_mpi) THEN CALL MPI_COMM_SIZE(COMM_LMDZ, mpi_size, ierr) CALL MPI_COMM_RANK(COMM_LMDZ, mpi_rank, ierr) ELSE mpi_size = 1 mpi_rank = 0 ENDIF ! Open text output file with mpi_rank in suffix of file name IF (lunout /= 5 .AND. lunout /= 6) THEN WRITE(num, '(I4.4)') mpi_rank filename = 'lmdz.out_' // num IF (mpi_rank /= 0) THEN OPEN(UNIT = lunout, FILE = TRIM(filename), ACTION = 'write', & STATUS = 'unknown', FORM = 'formatted', IOSTAT = ierr) ENDIF ENDIF allocate(jj_begin_para(0:mpi_size - 1)) allocate(jj_end_para(0:mpi_size - 1)) allocate(jj_nb_para(0:mpi_size - 1)) DO i = 0, mpi_size - 1 jj_nb_para(i) = (jjm + 1) / mpi_size IF (i < MOD((jjm + 1), mpi_size)) jj_nb_para(i) = jj_nb_para(i) + 1 IF (jj_nb_para(i) <= 1) THEN WRITE(lunout, *)"Arret : le nombre de bande de lattitude par process est trop faible (<2)." WRITE(lunout, *)" ---> diminuez le nombre de CPU ou augmentez la taille en lattitude" IF (using_mpi) CALL MPI_ABORT(COMM_LMDZ, -1, ierr) endif enddo ! jj_nb_para(0)=11 ! jj_nb_para(1)=25 ! jj_nb_para(2)=25 ! jj_nb_para(3)=12 j = 1 DO i = 0, mpi_size - 1 jj_begin_para(i) = j jj_end_para(i) = j + jj_Nb_para(i) - 1 j = j + jj_Nb_para(i) enddo jj_begin = jj_begin_para(mpi_rank) jj_end = jj_end_para(mpi_rank) jj_nb = jj_nb_para(mpi_rank) ij_begin = (jj_begin - 1) * iip1 + 1 ij_end = jj_end * iip1 IF (mpi_rank==0) THEN pole_nord = .TRUE. else pole_nord = .FALSE. endif IF (mpi_rank==mpi_size - 1) THEN pole_sud = .TRUE. else pole_sud = .FALSE. endif WRITE(lunout, *)"init_parallel: jj_begin", jj_begin WRITE(lunout, *)"init_parallel: jj_end", jj_end WRITE(lunout, *)"init_parallel: ij_begin", ij_begin WRITE(lunout, *)"init_parallel: ij_end", ij_end jjb_u = MAX(jj_begin - halo_max, 1) jje_u = MIN(jj_end + halo_max, jjp1) jjnb_u = jje_u - jjb_u + 1 jjb_v = MAX(jj_begin - halo_max, 1) jje_v = MIN(jj_end + halo_max, jjm) jjnb_v = jje_v - jjb_v + 1 ijb_u = MAX(ij_begin - halo_max * iip1, 1) ije_u = MIN(ij_end + halo_max * iip1, ip1jmp1) ijnb_u = ije_u - ijb_u + 1 ijb_v = MAX(ij_begin - halo_max * iip1, 1) ije_v = MIN(ij_end + halo_max * iip1, ip1jm) ijnb_v = ije_v - ijb_v + 1 !$OMP PARALLEL #ifdef CPP_OMP !$OMP MASTER omp_size=OMP_GET_NUM_THREADS() !$OMP END MASTER !$OMP BARRIER omp_rank=OMP_GET_THREAD_NUM() !Config Key = omp_chunk !Config Desc = taille des blocs openmp !Config Def = 1 !Config Help = defini la taille des packets d'itération openmp !Config distribue a chaque tache lors de l'entree dans une !Config boucle parallelisee !$OMP MASTER omp_chunk=(llm+1)/omp_size IF (MOD(llm+1,omp_size)/=0) omp_chunk=omp_chunk+1 CALL getin('omp_chunk',omp_chunk) !$OMP END MASTER !$OMP BARRIER #else omp_size = 1 omp_rank = 0 #endif !$OMP END PARALLEL CALL create_distrib(jj_nb_para, current_dist) IF ((mpi_rank==0).AND.(omp_rank==0)) THEN is_master = .TRUE. ELSE is_master = .FALSE. ENDIF END SUBROUTINE init_parallel SUBROUTINE create_distrib(jj_nb_new, d) USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE INTEGER, INTENT(IN) :: jj_Nb_New(0:MPI_Size - 1) TYPE(distrib), INTENT(INOUT) :: d INTEGER :: i IF (.NOT. ASSOCIATED(d%jj_nb_para)) ALLOCATE(d%jj_nb_para(0:MPI_Size - 1)) IF (.NOT. ASSOCIATED(d%jj_begin_para)) ALLOCATE(d%jj_begin_para(0:MPI_Size - 1)) IF (.NOT. ASSOCIATED(d%jj_end_para)) ALLOCATE(d%jj_end_para(0:MPI_Size - 1)) d%jj_Nb_Para = jj_Nb_New d%jj_begin_para(0) = 1 d%jj_end_para(0) = d%jj_Nb_Para(0) DO i = 1, mpi_size - 1 d%jj_begin_para(i) = d%jj_end_para(i - 1) + 1 d%jj_end_para(i) = d%jj_begin_para(i) + d%jj_Nb_para(i) - 1 enddo d%jj_begin = d%jj_begin_para(mpi_rank) d%jj_end = d%jj_end_para(mpi_rank) d%jj_nb = d%jj_nb_para(mpi_rank) d%ij_begin = (d%jj_begin - 1) * iip1 + 1 d%ij_end = d%jj_end * iip1 d%jjb_u = MAX(d%jj_begin - halo_max, 1) d%jje_u = MIN(d%jj_end + halo_max, jjp1) d%jjnb_u = d%jje_u - d%jjb_u + 1 d%jjb_v = MAX(d%jj_begin - halo_max, 1) d%jje_v = MIN(d%jj_end + halo_max, jjm) d%jjnb_v = d%jje_v - d%jjb_v + 1 d%ijb_u = MAX(d%ij_begin - halo_max * iip1, 1) d%ije_u = MIN(d%ij_end + halo_max * iip1, ip1jmp1) d%ijnb_u = d%ije_u - d%ijb_u + 1 d%ijb_v = MAX(d%ij_begin - halo_max * iip1, 1) d%ije_v = MIN(d%ij_end + halo_max * iip1, ip1jm) d%ijnb_v = d%ije_v - d%ijb_v + 1 END SUBROUTINE create_distrib SUBROUTINE Set_Distrib(d) USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE TYPE(distrib), INTENT(IN) :: d jj_begin = d%jj_begin jj_end = d%jj_end jj_nb = d%jj_nb ij_begin = d%ij_begin ij_end = d%ij_end jjb_u = d%jjb_u jje_u = d%jje_u jjnb_u = d%jjnb_u jjb_v = d%jjb_v jje_v = d%jje_v jjnb_v = d%jjnb_v ijb_u = d%ijb_u ije_u = d%ije_u ijnb_u = d%ijnb_u ijb_v = d%ijb_v ije_v = d%ije_v ijnb_v = d%ijnb_v jj_begin_para(:) = d%jj_begin_para(:) jj_end_para(:) = d%jj_end_para(:) jj_nb_para(:) = d%jj_nb_para(:) current_dist = d END SUBROUTINE Set_Distrib SUBROUTINE copy_distrib(dist, new_dist) USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE TYPE(distrib), INTENT(INOUT) :: dist TYPE(distrib), INTENT(IN) :: new_dist dist%jj_begin = new_dist%jj_begin dist%jj_end = new_dist%jj_end dist%jj_nb = new_dist%jj_nb dist%ij_begin = new_dist%ij_begin dist%ij_end = new_dist%ij_end dist%jjb_u = new_dist%jjb_u dist%jje_u = new_dist%jje_u dist%jjnb_u = new_dist%jjnb_u dist%jjb_v = new_dist%jjb_v dist%jje_v = new_dist%jje_v dist%jjnb_v = new_dist%jjnb_v dist%ijb_u = new_dist%ijb_u dist%ije_u = new_dist%ije_u dist%ijnb_u = new_dist%ijnb_u dist%ijb_v = new_dist%ijb_v dist%ije_v = new_dist%ije_v dist%ijnb_v = new_dist%ijnb_v dist%jj_begin_para(:) = new_dist%jj_begin_para(:) dist%jj_end_para(:) = new_dist%jj_end_para(:) dist%jj_nb_para(:) = new_dist%jj_nb_para(:) END SUBROUTINE copy_distrib SUBROUTINE get_current_distrib(d) USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE TYPE(distrib), INTENT(OUT) :: d d = current_dist END SUBROUTINE get_current_distrib SUBROUTINE Finalize_parallel USE lmdz_mpi USE lmdz_wxios ! ug Pour les sorties XIOS USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet USE control_mod, only : ok_dyn_xios #ifdef CPP_COUPLE ! Use of Oasis-MCT coupler #if defined CPP_OMCT USE mod_prism #else USE mod_prism_proto #endif ! Ehouarn: surface_data module is in 'phylmd' ... USE surface_data, ONLY: type_ocean IMPLICIT NONE #else IMPLICIT NONE ! without the surface_data module, we declare (and set) a dummy 'type_ocean' CHARACTER(LEN = 6), parameter :: type_ocean = "dummy" #endif INTEGER :: ierr INTEGER :: i IF (allocated(jj_begin_para)) deallocate(jj_begin_para) IF (allocated(jj_end_para)) deallocate(jj_end_para) IF (allocated(jj_nb_para)) deallocate(jj_nb_para) IF (type_ocean == 'couple') THEN #ifdef CPP_COUPLE IF (using_xios) THEN !Fermeture propre de XIOS ! close xios dynamic context if is defined (call LMDZDYN) IF (ok_dyn_xios) THEN CALL xios_context_finalize() ENDIF CALL wxios_close() CALL prism_terminate_proto(ierr) IF (ierr .NE. PRISM_Ok) THEN CALL abort_gcm('Finalize_parallel',' Probleme dans prism_terminate_proto ',1) ENDIF ELSE CALL prism_terminate_proto(ierr) IF (ierr .NE. PRISM_Ok) THEN CALL abort_gcm('Finalize_parallel',' Probleme dans prism_terminate_proto ',1) endif ENDIF #else CALL abort_gcm('Finalize_parallel', 'type_ocean = couple but CPP_COUPLE not defined', 1) #endif ELSE IF (using_xios) THEN !Fermeture propre de XIOS IF (ok_dyn_xios) THEN CALL xios_context_finalize() END IF CALL wxios_close() END IF IF (using_mpi) CALL MPI_FINALIZE(ierr) END IF END SUBROUTINE Finalize_parallel SUBROUTINE Pack_Data(Field, ij, ll, row, Buffer) USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE INTEGER, INTENT(IN) :: ij, ll, row REAL, DIMENSION(ij, ll), INTENT(IN) :: Field REAL, DIMENSION(ll * iip1 * row), INTENT(OUT) :: Buffer INTEGER :: Pos INTEGER :: i, l Pos = 0 DO l = 1, ll DO i = 1, row * iip1 Pos = Pos + 1 Buffer(Pos) = Field(i, l) enddo enddo END SUBROUTINE Pack_data SUBROUTINE Unpack_Data(Field, ij, ll, row, Buffer) USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE INTEGER, INTENT(IN) :: ij, ll, row REAL, DIMENSION(ij, ll), INTENT(OUT) :: Field REAL, DIMENSION(ll * iip1 * row), INTENT(IN) :: Buffer INTEGER :: Pos INTEGER :: i, l Pos = 0 DO l = 1, ll DO i = 1, row * iip1 Pos = Pos + 1 Field(i, l) = Buffer(Pos) enddo enddo END SUBROUTINE UnPack_data SUBROUTINE barrier USE lmdz_mpi IMPLICIT NONE INTEGER :: ierr !$OMP CRITICAL (MPI) IF (using_mpi) CALL MPI_Barrier(COMM_LMDZ, ierr) !$OMP END CRITICAL (MPI) END SUBROUTINE barrier SUBROUTINE exchange_hallo(Field, ij, ll, up, down) USE lmdz_mpi USE lmdz_vampir USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE INTEGER :: ij, ll REAL, DIMENSION(ij, ll) :: Field INTEGER :: up, down INTEGER :: ierr LOGICAL :: SendUp, SendDown LOGICAL :: RecvUp, RecvDown INTEGER, DIMENSION(4) :: Request INTEGER, DIMENSION(MPI_STATUS_SIZE, 4) :: Status INTEGER :: NbRequest REAL, DIMENSION(:), ALLOCATABLE :: Buffer_Send_up, Buffer_Send_down REAL, DIMENSION(:), ALLOCATABLE :: Buffer_Recv_up, Buffer_Recv_down INTEGER :: Buffer_size IF (using_mpi) THEN CALL barrier CALL VTb(VThallo) SendUp = .TRUE. SendDown = .TRUE. RecvUp = .TRUE. RecvDown = .TRUE. IF (pole_nord) THEN SendUp = .FALSE. RecvUp = .FALSE. ENDIF IF (pole_sud) THEN SendDown = .FALSE. RecvDown = .FALSE. ENDIF IF (up==0) THEN SendDown = .FALSE. RecvUp = .FALSE. endif IF (down==0) THEN SendUp = .FALSE. RecvDown = .FALSE. endif NbRequest = 0 IF (SendUp) THEN NbRequest = NbRequest + 1 buffer_size = down * iip1 * ll allocate(Buffer_Send_up(Buffer_size)) CALL PACK_Data(Field(ij_begin, 1), ij, ll, down, Buffer_Send_up) !$OMP CRITICAL (MPI) CALL MPI_ISEND(Buffer_send_up, Buffer_Size, MPI_REAL8, MPI_Rank - 1, 1, & COMM_LMDZ, Request(NbRequest), ierr) !$OMP END CRITICAL (MPI) ENDIF IF (SendDown) THEN NbRequest = NbRequest + 1 buffer_size = up * iip1 * ll allocate(Buffer_Send_down(Buffer_size)) CALL PACK_Data(Field(ij_end + 1 - up * iip1, 1), ij, ll, up, Buffer_send_down) !$OMP CRITICAL (MPI) CALL MPI_ISEND(Buffer_send_down, Buffer_Size, MPI_REAL8, MPI_Rank + 1, 1, & COMM_LMDZ, Request(NbRequest), ierr) !$OMP END CRITICAL (MPI) ENDIF IF (RecvUp) THEN NbRequest = NbRequest + 1 buffer_size = up * iip1 * ll allocate(Buffer_recv_up(Buffer_size)) !$OMP CRITICAL (MPI) CALL MPI_IRECV(Buffer_recv_up, Buffer_size, MPI_REAL8, MPI_Rank - 1, 1, & COMM_LMDZ, Request(NbRequest), ierr) !$OMP END CRITICAL (MPI) ENDIF IF (RecvDown) THEN NbRequest = NbRequest + 1 buffer_size = down * iip1 * ll allocate(Buffer_recv_down(Buffer_size)) !$OMP CRITICAL (MPI) CALL MPI_IRECV(Buffer_recv_down, Buffer_size, MPI_REAL8, MPI_Rank + 1, 1, & COMM_LMDZ, Request(NbRequest), ierr) !$OMP END CRITICAL (MPI) ENDIF IF (NbRequest > 0) CALL MPI_WAITALL(NbRequest, Request, Status, ierr) IF (RecvUp) CALL Unpack_Data(Field(ij_begin - up * iip1, 1), ij, ll, up, Buffer_Recv_up) IF (RecvDown) CALL Unpack_Data(Field(ij_end + 1, 1), ij, ll, down, Buffer_Recv_down) CALL VTe(VThallo) CALL barrier ENDIF ! using_mpi END SUBROUTINE exchange_Hallo SUBROUTINE Gather_Field(Field, ij, ll, rank) USE lmdz_mpi USE lmdz_iniprint, ONLY: lunout, prt_level USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE INTEGER :: ij, ll, rank REAL, DIMENSION(ij, ll) :: Field REAL, DIMENSION(:), ALLOCATABLE :: Buffer_send REAL, DIMENSION(:), ALLOCATABLE :: Buffer_Recv INTEGER, DIMENSION(0:MPI_Size - 1) :: Recv_count, displ INTEGER :: ierr INTEGER :: i IF (using_mpi) THEN IF (ij==ip1jmp1) THEN allocate(Buffer_send(iip1 * ll * (jj_end - jj_begin + 1))) CALL Pack_Data(Field(ij_begin, 1), ij, ll, jj_end - jj_begin + 1, Buffer_send) ELSE IF (ij==ip1jm) THEN allocate(Buffer_send(iip1 * ll * (min(jj_end, jjm) - jj_begin + 1))) CALL Pack_Data(Field(ij_begin, 1), ij, ll, min(jj_end, jjm) - jj_begin + 1, Buffer_send) else WRITE(lunout, *)ij CALL abort_gcm("parallel_lmdz", "erreur dans Gather_Field", 1) endif IF (MPI_Rank==rank) THEN allocate(Buffer_Recv(ij * ll)) !CDIR NOVECTOR DO i = 0, MPI_Size - 1 IF (ij==ip1jmp1) THEN Recv_count(i) = (jj_end_para(i) - jj_begin_para(i) + 1) * ll * iip1 ELSE IF (ij==ip1jm) THEN Recv_count(i) = (min(jj_end_para(i), jjm) - jj_begin_para(i) + 1) * ll * iip1 else CALL abort_gcm("parallel_lmdz", "erreur dans Gather_Field", 1) endif IF (i==0) THEN displ(i) = 0 else displ(i) = displ(i - 1) + Recv_count(i - 1) endif enddo else ! Ehouarn: When in debug mode, ifort complains (for CALL MPI_GATHERV ! below) about Buffer_Recv() being not allocated. ! So make a dummy allocation. allocate(Buffer_Recv(1)) endif ! of if (MPI_Rank==rank) !$OMP CRITICAL (MPI) CALL MPI_GATHERV(Buffer_send, (min(ij_end, ij) - ij_begin + 1) * ll, MPI_REAL8, & Buffer_Recv, Recv_count, displ, MPI_REAL8, rank, COMM_LMDZ, ierr) !$OMP END CRITICAL (MPI) IF (MPI_Rank==rank) THEN IF (ij==ip1jmp1) THEN DO i = 0, MPI_Size - 1 CALL Unpack_Data(Field((jj_begin_para(i) - 1) * iip1 + 1, 1), ij, ll, & jj_end_para(i) - jj_begin_para(i) + 1, Buffer_Recv(displ(i) + 1)) enddo ELSE IF (ij==ip1jm) THEN DO i = 0, MPI_Size - 1 CALL Unpack_Data(Field((jj_begin_para(i) - 1) * iip1 + 1, 1), ij, ll, & min(jj_end_para(i), jjm) - jj_begin_para(i) + 1, Buffer_Recv(displ(i) + 1)) enddo endif endif ENDIF ! using_mpi END SUBROUTINE Gather_Field SUBROUTINE AllGather_Field(Field, ij, ll) USE lmdz_mpi USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE INTEGER :: ij, ll REAL, DIMENSION(ij, ll) :: Field INTEGER :: ierr IF (using_mpi) THEN CALL Gather_Field(Field, ij, ll, 0) !$OMP CRITICAL (MPI) CALL MPI_BCAST(Field, ij * ll, MPI_REAL8, 0, COMM_LMDZ, ierr) !$OMP END CRITICAL (MPI) ENDIF END SUBROUTINE AllGather_Field SUBROUTINE Broadcast_Field(Field, ij, ll, rank) USE lmdz_mpi USE lmdz_dimensions, ONLY: iim, jjm, llm, ndm USE lmdz_paramet IMPLICIT NONE INTEGER :: ij, ll REAL, DIMENSION(ij, ll) :: Field INTEGER :: rank INTEGER :: ierr IF (using_mpi) THEN !$OMP CRITICAL (MPI) CALL MPI_BCAST(Field, ij * ll, MPI_REAL8, rank, COMM_LMDZ, ierr) !$OMP END CRITICAL (MPI) ENDIF END SUBROUTINE Broadcast_Field END MODULE parallel_lmdz