MODULE timeloop_gcm_mod
  USE transfert_mod
  USE icosa
  PRIVATE
  
  PUBLIC :: init_timeloop, timeloop

  INTEGER, PARAMETER :: euler=1, rk4=2, mlf=3
  INTEGER, PARAMETER :: itau_sync=10

  TYPE(t_message),SAVE :: req_ps0, req_mass0, req_theta_rhodz0, req_u0, req_q0

  TYPE(t_field),POINTER,SAVE :: f_q(:)
  TYPE(t_field),POINTER,SAVE :: f_rhodz(:), f_mass(:), f_massm1(:), f_massm2(:), f_dmass(:)
  TYPE(t_field),POINTER,SAVE :: f_phis(:), f_ps(:),f_psm1(:), f_psm2(:), f_dps(:)
  TYPE(t_field),POINTER,SAVE :: f_u(:),f_um1(:),f_um2(:), f_du(:)
  TYPE(t_field),POINTER,SAVE :: f_theta_rhodz(:),f_theta_rhodzm1(:),f_theta_rhodzm2(:), f_dtheta_rhodz(:)
  TYPE(t_field),POINTER,SAVE :: f_hflux(:), f_wflux(:), f_hfluxt(:), f_wfluxt(:)  

  INTEGER,SAVE :: nb_stage, matsuno_period, scheme    
!$OMP THREADPRIVATE(nb_stage, matsuno_period, scheme)

CONTAINS
  
  SUBROUTINE init_timeloop
  USE icosa
  USE dissip_gcm_mod
  USE caldyn_mod
  USE etat0_mod
  USE disvert_mod
  USE guided_mod
  USE advect_tracer_mod
  USE physics_mod
  USE mpipara
  USE omp_para
  USE trace
  USE transfert_mod
  USE check_conserve_mod
  USE output_field_mod
  USE write_field_mod
  USE theta2theta_rhodz_mod
  USE sponge_mod
  IMPLICIT NONE

    CHARACTER(len=255) :: def


   IF (xios_output) itau_out=1
   IF (.NOT. enable_io) itau_out=HUGE(itau_out)

! Time-independant orography
    CALL allocate_field(f_phis,field_t,type_real,name='phis')
! Trends
    CALL allocate_field(f_du,field_u,type_real,llm,name='du')
    CALL allocate_field(f_dtheta_rhodz,field_t,type_real,llm,name='dtheta_rhodz')
! Model state at current time step (RK/MLF/Euler)
    CALL allocate_field(f_ps,field_t,type_real, name='ps')
    CALL allocate_field(f_mass,field_t,type_real,llm,name='mass')
    CALL allocate_field(f_u,field_u,type_real,llm,name='u')
    CALL allocate_field(f_theta_rhodz,field_t,type_real,llm,name='theta_rhodz')
! Model state at previous time step (RK/MLF)
    CALL allocate_field(f_um1,field_u,type_real,llm,name='um1')
    CALL allocate_field(f_theta_rhodzm1,field_t,type_real,llm,name='theta_rhodzm1')
! Tracers
    CALL allocate_field(f_q,field_t,type_real,llm,nqtot,'q')
    CALL allocate_field(f_rhodz,field_t,type_real,llm,name='rhodz')
! Mass fluxes
    CALL allocate_field(f_hflux,field_u,type_real,llm)    ! instantaneous mass fluxes
    CALL allocate_field(f_hfluxt,field_u,type_real,llm)   ! mass "fluxes" accumulated in time
    CALL allocate_field(f_wflux,field_t,type_real,llm+1)  ! vertical mass fluxes
    CALL allocate_field(f_dmass,field_t,type_real,llm, name='dmass')

    IF(caldyn_eta == eta_mass) THEN ! eta = mass coordinate (default)
       CALL allocate_field(f_dps,field_t,type_real,name='dps')
       CALL allocate_field(f_psm1,field_t,type_real,name='psm1')
       CALL allocate_field(f_wfluxt,field_t,type_real,llm+1,name='wfluxt')
       ! the following are unused but must point to something
!       f_massm1 => f_mass
    ELSE ! eta = Lagrangian vertical coordinate
       CALL allocate_field(f_massm1,field_t,type_real,llm, name='massm1')
       ! the following are unused but must point to something
       f_wfluxt => f_wflux
       f_dps  => f_phis
       f_psm1 => f_phis
    END IF

    def='runge_kutta'
    CALL getin('scheme',def)

    SELECT CASE (TRIM(def))
      CASE('euler')
        scheme=euler
        nb_stage=1
      CASE ('runge_kutta')
        scheme=rk4
        nb_stage=4
      CASE ('leapfrog_matsuno')
        scheme=mlf
        matsuno_period=5
        CALL getin('matsuno_period',matsuno_period)
        nb_stage=matsuno_period+1
     ! Model state 2 time steps ago (MLF)
        CALL allocate_field(f_theta_rhodzm2,field_t,type_real,llm)
        CALL allocate_field(f_um2,field_u,type_real,llm)
        IF(caldyn_eta == eta_mass) THEN ! eta = mass coordinate (default)
           CALL allocate_field(f_psm2,field_t,type_real)
           ! the following are unused but must point to something
           f_massm2 => f_mass
        ELSE ! eta = Lagrangian vertical coordinate
           CALL allocate_field(f_massm2,field_t,type_real,llm)
           ! the following are unused but must point to something
           f_psm2 => f_phis
        END IF
      CASE ('none')
        nb_stage=0

    CASE default
       PRINT*,'Bad selector for variable scheme : <', TRIM(def),             &
            ' > options are <euler>, <runge_kutta>, <leapfrog_matsuno>'
       STOP
    END SELECT

    CALL init_theta2theta_rhodz
    CALL init_dissip
    CALL init_sponge
    CALL init_caldyn
    CALL init_guided
    CALL init_advect_tracer
    CALL init_check_conserve

    CALL etat0(f_ps,f_mass,f_phis,f_theta_rhodz,f_u, f_q)

    CALL transfert_request(f_phis,req_i0) 
    CALL transfert_request(f_phis,req_i1) 
    CALL writefield("phis",f_phis,once=.TRUE.)

    CALL init_message(f_ps,req_i0,req_ps0)
    CALL init_message(f_mass,req_i0,req_mass0)
    CALL init_message(f_theta_rhodz,req_i0,req_theta_rhodz0)
    CALL init_message(f_u,req_e0_vect,req_u0)
    CALL init_message(f_q,req_i0,req_q0)

  END SUBROUTINE init_timeloop
  
  SUBROUTINE timeloop
  USE icosa
  USE dissip_gcm_mod
  USE sponge_mod
  USE disvert_mod
  USE caldyn_mod
  USE caldyn_gcm_mod, ONLY : req_ps, req_mass
  USE etat0_mod
  USE guided_mod
  USE advect_tracer_mod
  USE physics_mod
  USE mpipara
  USE omp_para
  USE trace
  USE transfert_mod
  USE check_conserve_mod
  USE xios_mod
  USE output_field_mod
  USE write_etat0_mod
  USE checksum_mod
  IMPLICIT NONE  
    REAL(rstd),POINTER :: q(:,:,:)
    REAL(rstd),POINTER :: phis(:), ps(:) ,psm1(:), psm2(:), dps(:)
    REAL(rstd),POINTER :: u(:,:) , um1(:,:), um2(:,:), du(:,:)
    REAL(rstd),POINTER :: rhodz(:,:), mass(:,:), massm1(:,:), massm2(:,:), dmass(:,:)
    REAL(rstd),POINTER :: theta_rhodz(:,:) , theta_rhodzm1(:,:), theta_rhodzm2(:,:), dtheta_rhodz(:,:)
    REAL(rstd),POINTER :: hflux(:,:),wflux(:,:),hfluxt(:,:),wfluxt(:,:)

    INTEGER :: ind
    INTEGER :: it,i,j,n,  stage
    LOGICAL :: fluxt_zero(ndomain) ! set to .TRUE. to start accumulating fluxes in time
    LOGICAL, PARAMETER :: check=.FALSE.
    INTEGER :: start_clock
    INTEGER :: stop_clock
    INTEGER :: rate_clock
    INTEGER :: l
    LOGICAL,SAVE :: first_physic=.TRUE.
!$OMP THREADPRIVATE(first_physic)    
    
   
    CALL switch_omp_distrib_level
    CALL caldyn_BC(f_phis, f_wflux) ! set constant values in first/last interfaces
    
!$OMP BARRIER
  DO ind=1,ndomain
     IF (.NOT. assigned_domain(ind)) CYCLE
     CALL swap_dimensions(ind)
     CALL swap_geometry(ind)
     rhodz=f_rhodz(ind); mass=f_mass(ind); ps=f_ps(ind)
     IF(caldyn_eta==eta_mass) THEN
        CALL compute_rhodz(.TRUE., ps, rhodz) ! save rhodz for transport scheme before dynamics update ps
     ELSE
       DO l=ll_begin,ll_end
         rhodz(:,l)=mass(:,l)
       ENDDO
     END IF
  END DO
!$OMP BARRIER
  fluxt_zero=.TRUE.

!$OMP MASTER
  CALL SYSTEM_CLOCK(start_clock)
!$OMP END MASTER   

  CALL check_conserve(f_ps,f_dps,f_u,f_theta_rhodz,f_phis,itau0)  

  CALL trace_on
  
  DO it=itau0+1,itau0+itaumax
     
    CALL check_conserve_detailed('detailed_budget 0', &
         f_ps,f_dps,f_u,f_theta_rhodz,f_phis,it)

    IF (xios_output) CALL xios_update_calendar(it)
    IF (it==itau0+1 .OR. MOD(it,itau_sync)==0) THEN
      CALL send_message(f_ps,req_ps0)
      CALL wait_message(req_ps0)
      CALL send_message(f_mass,req_mass0)
      CALL wait_message(req_mass0)
      CALL send_message(f_theta_rhodz,req_theta_rhodz0) 
      CALL wait_message(req_theta_rhodz0) 
      CALL send_message(f_u,req_u0)
      CALL wait_message(req_u0)
      CALL send_message(f_q,req_q0) 
      CALL wait_message(req_q0) 

    ENDIF

    IF (is_master) PRINT *,"It No :",It,"   t :",dt*It

    IF (mod(it,itau_out)==0 ) THEN
      CALL update_time_counter(dt*it)
      CALL output_field("q",f_q)
    ENDIF
    
    CALL guided(it*dt,f_ps,f_theta_rhodz,f_u,f_q)

    DO stage=1,nb_stage
!       CALL checksum(f_ps)
!       CALL checksum(f_theta_rhodz)
!       CALL checksum(f_mass)
       CALL caldyn((stage==1) .AND. (MOD(it,itau_out)==0), &
            f_phis,f_ps,f_mass,f_theta_rhodz,f_u, f_q, &
            f_hflux, f_wflux, f_dps, f_dmass, f_dtheta_rhodz, f_du)
!       CALL checksum(f_dps)
!       CALL checksum(f_dtheta_rhodz)
!       CALL checksum(f_dmass)
       SELECT CASE (scheme)
       CASE(euler)
          CALL euler_scheme(.TRUE.)
       CASE (rk4)
          CALL rk_scheme(stage)
       CASE (mlf)
          CALL  leapfrog_matsuno_scheme(stage)
       CASE DEFAULT
          STOP
       END SELECT
    END DO

    CALL check_conserve_detailed('detailed_budget 1', &
         f_ps,f_dps,f_u,f_theta_rhodz,f_phis,it)

    IF (MOD(it,itau_dissip)==0) THEN
       
       IF(caldyn_eta==eta_mass) THEN
!ym flush ps
!$OMP BARRIER
          DO ind=1,ndomain
             IF (.NOT. assigned_domain(ind)) CYCLE
             CALL swap_dimensions(ind)
             CALL swap_geometry(ind)
             mass=f_mass(ind); ps=f_ps(ind);
             CALL compute_rhodz(.TRUE., ps, mass)
          END DO
       ENDIF

       CALL dissip(f_u,f_du,f_mass,f_phis, f_theta_rhodz,f_dtheta_rhodz)

       CALL euler_scheme(.FALSE.)  ! update only u, theta
       IF (iflag_sponge > 0) THEN
         CALL sponge(f_u,f_du,f_theta_rhodz,f_dtheta_rhodz)
         CALL euler_scheme(.FALSE.)  ! update only u, theta
       ENDIF
    END IF

    CALL check_conserve_detailed('detailed_budget 2', &
         f_ps,f_dps,f_u,f_theta_rhodz,f_phis,it)

    IF(MOD(it,itau_adv)==0) THEN

       CALL advect_tracer(f_hfluxt,f_wfluxt,f_u, f_q,f_rhodz)  ! update q and rhodz after RK step
       fluxt_zero=.TRUE.

       ! FIXME : check that rhodz is consistent with ps
       IF (check) THEN
         DO ind=1,ndomain
            IF (.NOT. assigned_domain(ind)) CYCLE
            CALL swap_dimensions(ind)
            CALL swap_geometry(ind)
            rhodz=f_rhodz(ind); ps=f_ps(ind);
            CALL compute_rhodz(.FALSE., ps, rhodz)   
         END DO
       ENDIF

    END IF


    IF (MOD(it,itau_check_conserv)==0) THEN
      CALL check_conserve(f_ps,f_dps,f_u,f_theta_rhodz,f_phis,it) 
    ENDIF
     
    IF (MOD(it,itau_physics)==0) THEN
      CALL physics(it,f_phis, f_ps, f_theta_rhodz, f_u, f_wflux, f_q)

!$OMP MASTER
   IF (first_physic) CALL SYSTEM_CLOCK(start_clock)
!$OMP END MASTER   
      first_physic=.FALSE.
    ENDIF
    
  ENDDO

  CALL write_etat0(itau0+itaumax,f_ps, f_phis,f_theta_rhodz,f_u,f_q) 

  CALL check_conserve(f_ps,f_dps,f_u,f_theta_rhodz,f_phis,it)  

!$OMP MASTER
  CALL SYSTEM_CLOCK(stop_clock)
  CALL SYSTEM_CLOCK(count_rate=rate_clock)
    
  IF (mpi_rank==0) THEN 
    PRINT *,"Time elapsed : ",(stop_clock-start_clock)*1./rate_clock 
  ENDIF  
!$OMP END MASTER 
 
 CONTAINS

    SUBROUTINE Euler_scheme(with_dps)
    IMPLICIT NONE
    LOGICAL :: with_dps
    INTEGER :: ind
    INTEGER :: i,j,ij,l
    CALL trace_start("Euler_scheme")  

    DO ind=1,ndomain
       IF (.NOT. assigned_domain(ind)) CYCLE
       CALL swap_dimensions(ind)
       CALL swap_geometry(ind)

       IF(with_dps) THEN ! update ps/mass
          IF(caldyn_eta==eta_mass) THEN ! update ps
             ps=f_ps(ind) ; dps=f_dps(ind) ;              
             IF (is_omp_first_level) THEN
!$SIMD
                DO ij=ij_begin,ij_end
                    ps(ij)=ps(ij)+dt*dps(ij)
                ENDDO
             ENDIF 
          ELSE ! update mass
             mass=f_mass(ind) ; dmass=f_dmass(ind) ;              
             DO l=ll_begin,ll_end
!$SIMD
                DO ij=ij_begin,ij_end
                    mass(ij,l)=mass(ij,l)+dt*dmass(ij,l)
                ENDDO
             END DO
          END IF

          hflux=f_hflux(ind);     hfluxt=f_hfluxt(ind)
          wflux=f_wflux(ind);     wfluxt=f_wfluxt(ind)
          CALL accumulate_fluxes(hflux,wflux,hfluxt,wfluxt,dt,fluxt_zero(ind))
       END IF ! update ps/mass
       
       u=f_u(ind) ; theta_rhodz=f_theta_rhodz(ind)
       du=f_du(ind) ; dtheta_rhodz=f_dtheta_rhodz(ind)

       DO l=ll_begin,ll_end
!$SIMD
         DO ij=ij_begin,ij_end
             u(ij+u_right,l)=u(ij+u_right,l)+dt*du(ij+u_right,l)
             u(ij+u_lup,l)=u(ij+u_lup,l)+dt*du(ij+u_lup,l)
             u(ij+u_ldown,l)=u(ij+u_ldown,l)+dt*du(ij+u_ldown,l)
             theta_rhodz(ij,l)=theta_rhodz(ij,l)+dt*dtheta_rhodz(ij,l)
         ENDDO
       ENDDO
    ENDDO

    CALL trace_end("Euler_scheme")  

    END SUBROUTINE Euler_scheme

    SUBROUTINE RK_scheme(stage)
      IMPLICIT NONE
      INTEGER :: ind, stage
      REAL(rstd), DIMENSION(4), PARAMETER :: coef = (/ .25, 1./3., .5, 1. /)
      REAL(rstd) :: tau
      INTEGER :: i,j,ij,l
  
      CALL trace_start("RK_scheme")  

      tau = dt*coef(stage)

      ! if mass coordinate, deal with ps first on one core
      IF(caldyn_eta==eta_mass) THEN
         IF (is_omp_first_level) THEN

            DO ind=1,ndomain
               IF (.NOT. assigned_domain(ind)) CYCLE
               CALL swap_dimensions(ind)
               CALL swap_geometry(ind)
               ps=f_ps(ind) ; psm1=f_psm1(ind) ; dps=f_dps(ind) 
               
               IF (stage==1) THEN ! first stage : save model state in XXm1
!$SIMD
                 DO ij=ij_begin,ij_end
                   psm1(ij)=ps(ij)
                 ENDDO
               ENDIF
            
               ! updates are of the form x1 := x0 + tau*f(x1)
!$SIMD
               DO ij=ij_begin,ij_end
                   ps(ij)=psm1(ij)+tau*dps(ij)
               ENDDO
            ENDDO
         ENDIF
!         CALL send_message(f_ps,req_ps)
!ym no overlap for now         
!         CALL wait_message(req_ps)  
      
      ELSE ! Lagrangian coordinate, deal with mass
         DO ind=1,ndomain
            IF (.NOT. assigned_domain(ind)) CYCLE
            CALL swap_dimensions(ind)
            CALL swap_geometry(ind)
            mass=f_mass(ind); dmass=f_dmass(ind); massm1=f_massm1(ind)

            IF (stage==1) THEN ! first stage : save model state in XXm1
               DO l=ll_begin,ll_end
!$SIMD
                 DO ij=ij_begin,ij_end
                    massm1(ij,l)=mass(ij,l)
                 ENDDO
               ENDDO
            END IF

            ! updates are of the form x1 := x0 + tau*f(x1)
            DO l=ll_begin,ll_end
!$SIMD
               DO ij=ij_begin,ij_end
                   mass(ij,l)=massm1(ij,l)+tau*dmass(ij,l)
               ENDDO
            ENDDO
         END DO
!         CALL send_message(f_mass,req_mass)
!ym no overlap for now         
!         CALL wait_message(req_mass)  

      END IF

      ! now deal with other prognostic variables
      DO ind=1,ndomain
         IF (.NOT. assigned_domain(ind)) CYCLE
         CALL swap_dimensions(ind)
         CALL swap_geometry(ind)
         u=f_u(ind)      ; du=f_du(ind)      ; um1=f_um1(ind) 
         theta_rhodz=f_theta_rhodz(ind)
         theta_rhodzm1=f_theta_rhodzm1(ind)
         dtheta_rhodz=f_dtheta_rhodz(ind)
         
         IF (stage==1) THEN ! first stage : save model state in XXm1
           DO l=ll_begin,ll_end
!$SIMD
                DO ij=ij_begin,ij_end
                 um1(ij+u_right,l)=u(ij+u_right,l)
                 um1(ij+u_lup,l)=u(ij+u_lup,l)
                 um1(ij+u_ldown,l)=u(ij+u_ldown,l)
                 theta_rhodzm1(ij,l)=theta_rhodz(ij,l)
             ENDDO
           ENDDO
         END IF        

         DO l=ll_begin,ll_end
!$SIMD
           DO ij=ij_begin,ij_end
               u(ij+u_right,l)=um1(ij+u_right,l)+tau*du(ij+u_right,l)
               u(ij+u_lup,l)=um1(ij+u_lup,l)+tau*du(ij+u_lup,l)
               u(ij+u_ldown,l)=um1(ij+u_ldown,l)+tau*du(ij+u_ldown,l)
               theta_rhodz(ij,l)=theta_rhodzm1(ij,l)+tau*dtheta_rhodz(ij,l)
           ENDDO
         ENDDO

         IF(stage==nb_stage) THEN ! accumulate mass fluxes at last stage
            hflux=f_hflux(ind);     hfluxt=f_hfluxt(ind)
            wflux=f_wflux(ind);     wfluxt=f_wfluxt(ind)
            CALL accumulate_fluxes(hflux,wflux, hfluxt,wfluxt, dt,fluxt_zero(ind))
         END IF
      END DO
      
      CALL trace_end("RK_scheme")
      
    END SUBROUTINE RK_scheme

    SUBROUTINE leapfrog_matsuno_scheme(stage)
    IMPLICIT NONE
    INTEGER :: ind, stage
    REAL :: tau

      CALL trace_start("leapfrog_matsuno_scheme")
    
      tau = dt/nb_stage
      DO ind=1,ndomain
        IF (.NOT. assigned_domain(ind)) CYCLE
        CALL swap_dimensions(ind)
        CALL swap_geometry(ind)

        ps=f_ps(ind)   ; u=f_u(ind)   ; theta_rhodz=f_theta_rhodz(ind)
        psm1=f_psm1(ind) ; um1=f_um1(ind) ; theta_rhodzm1=f_theta_rhodzm1(ind)
        psm2=f_psm2(ind) ; um2=f_um2(ind) ; theta_rhodzm2=f_theta_rhodzm2(ind)
        dps=f_dps(ind) ; du=f_du(ind) ; dtheta_rhodz=f_dtheta_rhodz(ind)

        
        IF (stage==1) THEN
          psm1(:)=ps(:) ; um1(:,:)=u(:,:) ; theta_rhodzm1(:,:)=theta_rhodz(:,:)
          ps(:)=psm1(:)+tau*dps(:)
          u(:,:)=um1(:,:)+tau*du(:,:)
          theta_rhodz(:,:)=theta_rhodzm1(:,:)+tau*dtheta_rhodz(:,:)

        ELSE IF (stage==2) THEN

          ps(:)=psm1(:)+tau*dps(:)
          u(:,:)=um1(:,:)+tau*du(:,:)
          theta_rhodz(:,:)=theta_rhodzm1(:,:)+tau*dtheta_rhodz(:,:)

          psm2(:)=psm1(:) ; theta_rhodzm2(:,:)=theta_rhodzm1(:,:) ; um2(:,:)=um1(:,:) 
          psm1(:)=ps(:) ; theta_rhodzm1(:,:)=theta_rhodz(:,:) ; um1(:,:)=u(:,:) 

        ELSE 

          ps(:)=psm2(:)+2*tau*dps(:)
          u(:,:)=um2(:,:)+2*tau*du(:,:)
          theta_rhodz(:,:)=theta_rhodzm2(:,:)+2*tau*dtheta_rhodz(:,:)

          psm2(:)=psm1(:) ; theta_rhodzm2(:,:)=theta_rhodzm1(:,:) ; um2(:,:)=um1(:,:) 
          psm1(:)=ps(:) ; theta_rhodzm1(:,:)=theta_rhodz(:,:) ; um1(:,:)=u(:,:) 

        ENDIF
      
      ENDDO
      CALL trace_end("leapfrog_matsuno_scheme")
      
    END SUBROUTINE leapfrog_matsuno_scheme  
         
  END SUBROUTINE timeloop    

 SUBROUTINE accumulate_fluxes(hflux,wflux, hfluxt,wfluxt, tau,fluxt_zero)
    USE icosa
    USE omp_para
    USE disvert_mod
    IMPLICIT NONE
    REAL(rstd), INTENT(IN)    :: hflux(3*iim*jjm,llm), wflux(iim*jjm,llm+1)
    REAL(rstd), INTENT(INOUT) :: hfluxt(3*iim*jjm,llm), wfluxt(iim*jjm,llm+1)
    REAL(rstd), INTENT(IN) :: tau
    LOGICAL, INTENT(INOUT) :: fluxt_zero
    INTEGER :: l,i,j,ij

    IF(fluxt_zero) THEN

       fluxt_zero=.FALSE.

       DO l=ll_begin,ll_end
!$SIMD
         DO ij=ij_begin_ext,ij_end_ext
             hfluxt(ij+u_right,l) = tau*hflux(ij+u_right,l)
             hfluxt(ij+u_lup,l) = tau*hflux(ij+u_lup,l)
             hfluxt(ij+u_ldown,l) = tau*hflux(ij+u_ldown,l)
         ENDDO
       ENDDO

       IF(caldyn_eta==eta_mass) THEN ! no need for vertical fluxes if eta_lag
          DO l=ll_begin,ll_endp1
!$SIMD
             DO ij=ij_begin,ij_end
                wfluxt(ij,l) = tau*wflux(ij,l)
             ENDDO
          ENDDO
       END IF

    ELSE

       DO l=ll_begin,ll_end
!$SIMD
         DO ij=ij_begin_ext,ij_end_ext
             hfluxt(ij+u_right,l) = hfluxt(ij+u_right,l)+tau*hflux(ij+u_right,l)
             hfluxt(ij+u_lup,l) = hfluxt(ij+u_lup,l)+tau*hflux(ij+u_lup,l)
             hfluxt(ij+u_ldown,l) = hfluxt(ij+u_ldown,l)+tau*hflux(ij+u_ldown,l)
         ENDDO
       ENDDO

       IF(caldyn_eta==eta_mass) THEN ! no need for vertical fluxes if eta_lag
          DO l=ll_begin,ll_endp1
!$SIMD
             DO ij=ij_begin,ij_end
                   wfluxt(ij,l) = wfluxt(ij,l)+tau*wflux(ij,l)
             ENDDO
          ENDDO
       END IF

   END IF

  END SUBROUTINE accumulate_fluxes
  
END MODULE timeloop_gcm_mod
