Ignore:
Timestamp:
May 18, 2024, 8:07:34 PM (7 weeks ago)
Author:
idelkadi
Message:

The addition of explicit loops with the "omp simd reduction" directive to solve the slowness problem linked to the "sum" command (svn4938 revesion) led to non-reproducibility in MPI and mixed MPI-OMP modes in the case of Tripleclouds and Mcica solvers.
We return to the svn4848 versions of the radiation_tripleclouds_*w.F90 and radiation_mcica_*w.F90 routines.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • LMDZ6/trunk/libf/phylmd/ecrad/radiation/radiation_tripleclouds_sw.F90

    r4853 r4946  
    7474    ! Gas and aerosol optical depth, single-scattering albedo and
    7575    ! asymmetry factor at each shortwave g-point
    76     real(jprb), intent(in), dimension(config%n_g_sw,nlev,istartcol:iendcol) &
    77          &  :: od, ssa, g
     76!    real(jprb), intent(in), dimension(istartcol:iendcol,nlev,config%n_g_sw) :: &
     77    real(jprb), intent(in), dimension(config%n_g_sw,nlev,istartcol:iendcol) :: &
     78         &  od, ssa, g
    7879
    7980    ! Cloud and precipitation optical depth, single-scattering albedo and
    8081    ! asymmetry factor in each shortwave band
    81     real(jprb), intent(in), dimension(config%n_bands_sw,nlev,istartcol:iendcol) &
    82          &  :: od_cloud, ssa_cloud, g_cloud
     82    real(jprb), intent(in), dimension(config%n_bands_sw,nlev,istartcol:iendcol) :: &
     83         &  od_cloud, ssa_cloud, g_cloud
    8384
    8485    ! Optical depth, single scattering albedo and asymmetry factor in
     
    9192    ! flux into a plane perpendicular to the incoming radiation at
    9293    ! top-of-atmosphere in each of the shortwave g points
    93     real(jprb), intent(in), dimension(config%n_g_sw,istartcol:iendcol) &
    94          &  :: albedo_direct, albedo_diffuse, incoming_sw
     94    real(jprb), intent(in), dimension(config%n_g_sw,istartcol:iendcol) :: &
     95         &  albedo_direct, albedo_diffuse, incoming_sw
    9596
    9697    ! Output
     
    165166    real(jprb) :: scat_od, scat_od_cloud
    166167
    167     ! Temporaries to speed up summations
    168     real(jprb) :: sum_dn_diff, sum_dn_dir, sum_up
    169 
    170     ! Local cosine of solar zenith angle
    171168    real(jprb) :: mu0
    172169
     
    447444      end if
    448445     
    449       ! Store the TOA broadband fluxes, noting that there is no
    450       ! diffuse downwelling at TOA. The intrinsic "sum" command has
    451       ! been found to be very slow; better performance is found on
    452       ! x86-64 architecture with explicit loops and the "omp simd
    453       ! reduction" directive.
    454       sum_up     = 0.0_jprb
    455       sum_dn_dir = 0.0_jprb
    456       do jreg = 1,nregions
    457         !$omp simd reduction(+:sum_up, sum_dn_dir)
    458         do jg = 1,ng
    459           sum_up     = sum_up     + flux_up(jg,jreg)
    460           sum_dn_dir = sum_dn_dir + direct_dn(jg,jreg)
    461         end do
    462       end do
    463       flux%sw_up(jcol,1) = sum_up
    464       flux%sw_dn(jcol,1) = mu0 * sum_dn_dir
     446      ! Store the TOA broadband fluxes
     447      flux%sw_up(jcol,1) = sum(sum(flux_up,1))
     448      flux%sw_dn(jcol,1) = mu0 * sum(sum(direct_dn,1))
    465449      if (allocated(flux%sw_dn_direct)) then
    466450        flux%sw_dn_direct(jcol,1) = flux%sw_dn(jcol,1)
    467451      end if
    468452      if (config%do_clear) then
    469         sum_up     = 0.0_jprb
    470         sum_dn_dir = 0.0_jprb
    471         !$omp simd reduction(+:sum_up, sum_dn_dir)
    472         do jg = 1,ng
    473           sum_up     = sum_up     + flux_up_clear(jg)
    474           sum_dn_dir = sum_dn_dir + direct_dn_clear(jg)
    475         end do
    476         flux%sw_up_clear(jcol,1) = sum_up
    477         flux%sw_dn_clear(jcol,1) = mu0 * sum_dn_dir
     453        flux%sw_up_clear(jcol,1) = sum(flux_up_clear)
     454        flux%sw_dn_clear(jcol,1) = mu0 * sum(direct_dn_clear)
    478455        if (allocated(flux%sw_dn_direct_clear)) then
    479456          flux%sw_dn_direct_clear(jcol,1) = flux%sw_dn_clear(jcol,1)
     
    490467             &           config%i_spec_from_reordered_g_sw, &
    491468             &           flux%sw_dn_band(:,jcol,1))
    492         flux%sw_dn_band(:,jcol,1) = mu0 * flux%sw_dn_band(:,jcol,1)
     469        flux%sw_dn_band(:,jcol,1) = &
     470             &  mu0 * flux%sw_dn_band(:,jcol,1)
    493471        if (allocated(flux%sw_dn_direct_band)) then
    494472          flux%sw_dn_direct_band(:,jcol,1) = flux%sw_dn_band(:,jcol,1)
     
    571549               ! nothing to do
    572550
    573         ! Store the broadband fluxes. The intrinsic "sum" command has
    574         ! been found to be very slow; better performance is found on
    575         ! x86-64 architecture with explicit loops and the "omp simd
    576         ! reduction" directive.
    577         sum_up      = 0.0_jprb
    578         sum_dn_dir  = 0.0_jprb
    579         sum_dn_diff = 0.0_jprb
    580         do jreg = 1,nregions
    581           !$omp simd reduction(+:sum_up, sum_dn_diff, sum_dn_dir)
    582           do jg = 1,ng
    583             sum_up      = sum_up      + flux_up(jg,jreg)
    584             sum_dn_diff = sum_dn_diff + flux_dn(jg,jreg)
    585             sum_dn_dir  = sum_dn_dir  + direct_dn(jg,jreg)
    586           end do
    587         end do
    588         flux%sw_up(jcol,jlev+1) = sum_up
    589         flux%sw_dn(jcol,jlev+1) = mu0 * sum_dn_dir + sum_dn_diff
     551        ! Store the broadband fluxes
     552        flux%sw_up(jcol,jlev+1) = sum(sum(flux_up,1))
    590553        if (allocated(flux%sw_dn_direct)) then
    591           flux%sw_dn_direct(jcol,jlev+1) = mu0 * sum_dn_dir
     554          flux%sw_dn_direct(jcol,jlev+1) = mu0 * sum(sum(direct_dn,1))
     555          flux%sw_dn(jcol,jlev+1) &
     556               &  = flux%sw_dn_direct(jcol,jlev+1) + sum(sum(flux_dn,1))
     557        else
     558          flux%sw_dn(jcol,jlev+1) = mu0 * sum(sum(direct_dn,1)) + sum(sum(flux_dn,1))   
    592559        end if
    593560        if (config%do_clear) then
    594           sum_up      = 0.0_jprb
    595           sum_dn_dir  = 0.0_jprb
    596           sum_dn_diff = 0.0_jprb
    597           !$omp simd reduction(+:sum_up, sum_dn_diff, sum_dn_dir)
    598           do jg = 1,ng
    599             sum_up      = sum_up      + flux_up_clear(jg)
    600             sum_dn_diff = sum_dn_diff + flux_dn_clear(jg)
    601             sum_dn_dir  = sum_dn_dir  + direct_dn_clear(jg)
    602           end do
    603           flux%sw_up_clear(jcol,jlev+1) = sum_up
    604           flux%sw_dn_clear(jcol,jlev+1) = mu0 * sum_dn_dir + sum_dn_diff
     561          flux%sw_up_clear(jcol,jlev+1) = sum(flux_up_clear)
    605562          if (allocated(flux%sw_dn_direct_clear)) then
    606             flux%sw_dn_direct_clear(jcol,jlev+1) = mu0 * sum_dn_dir
     563            flux%sw_dn_direct_clear(jcol,jlev+1) = mu0 * sum(direct_dn_clear)
     564            flux%sw_dn_clear(jcol,jlev+1) &
     565                 &  = flux%sw_dn_direct_clear(jcol,jlev+1) + sum(flux_dn_clear)
     566          else
     567            flux%sw_dn_clear(jcol,jlev+1) = mu0 * sum(direct_dn_clear) &
     568                 &  + sum(flux_dn_clear)
    607569          end if
    608570        end if
     
    643605          end if
    644606        end if
     607
    645608      end do ! Final loop over levels
    646609     
Note: See TracChangeset for help on using the changeset viewer.