Ignore:
Timestamp:
May 18, 2024, 8:07:34 PM (2 weeks ago)
Author:
idelkadi
Message:

The addition of explicit loops with the "omp simd reduction" directive to solve the slowness problem linked to the "sum" command (svn4938 revesion) led to non-reproducibility in MPI and mixed MPI-OMP modes in the case of Tripleclouds and Mcica solvers.
We return to the svn4848 versions of the radiation_tripleclouds_*w.F90 and radiation_mcica_*w.F90 routines.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • LMDZ6/trunk/libf/phylmd/ecrad/radiation/radiation_mcica_sw.F90

    r4853 r4946  
    1717!   2017-04-22  R. Hogan  Store surface fluxes at all g-points
    1818!   2017-10-23  R. Hogan  Renamed single-character variables
    19 
    20 #include "ecrad_config.h"
    2119
    2220module radiation_mcica_sw
     
    121119    ! Total cloud cover output from the cloud generator
    122120    real(jprb) :: total_cloud_cover
    123 
    124     ! Temporary storage for more efficient summation
    125 #ifdef DWD_REDUCTION_OPTIMIZATIONS
    126     real(jprb), dimension(nlev+1,3) :: sum_aux
    127 #else
    128     real(jprb) :: sum_up, sum_dn_diff, sum_dn_dir
    129 #endif
    130121
    131122    ! Number of g points
     
    184175       
    185176        ! Sum over g-points to compute and save clear-sky broadband
    186         ! fluxes. Note that the built-in "sum" function is very slow,
    187         ! and before being replaced by the alternatives below
    188         ! accounted for around 40% of the total cost of this routine.
    189 #ifdef DWD_REDUCTION_OPTIMIZATIONS
    190         ! Optimized summation for the NEC architecture
    191         sum_aux(:,:) = 0.0_jprb
    192         do jg = 1,ng
    193           do jlev = 1,nlev+1
    194             sum_aux(jlev,1) = sum_aux(jlev,1) + flux_up(jg,jlev)
    195             sum_aux(jlev,2) = sum_aux(jlev,2) + flux_dn_direct(jg,jlev)
    196             sum_aux(jlev,3) = sum_aux(jlev,3) + flux_dn_diffuse(jg,jlev)
    197           end do
    198         end do
    199         flux%sw_up_clear(jcol,:) = sum_aux(:,1)
    200         flux%sw_dn_clear(jcol,:) = sum_aux(:,2) + sum_aux(:,3)
     177        ! fluxes
     178        flux%sw_up_clear(jcol,:) = sum(flux_up,1)
    201179        if (allocated(flux%sw_dn_direct_clear)) then
    202           flux%sw_dn_direct_clear(jcol,:) = sum_aux(:,2)
     180          flux%sw_dn_direct_clear(jcol,:) &
     181               &  = sum(flux_dn_direct,1)
     182          flux%sw_dn_clear(jcol,:) = sum(flux_dn_diffuse,1) &
     183               &  + flux%sw_dn_direct_clear(jcol,:)
     184        else
     185          flux%sw_dn_clear(jcol,:) = sum(flux_dn_diffuse,1) &
     186               &  + sum(flux_dn_direct,1)
    203187        end if
    204 #else
    205         ! Optimized summation for the x86-64 architecture
    206         do jlev = 1,nlev+1
    207           sum_up      = 0.0_jprb
    208           sum_dn_diff = 0.0_jprb
    209           sum_dn_dir  = 0.0_jprb
    210           !$omp simd reduction(+:sum_up, sum_dn_diff, sum_dn_dir)
    211           do jg = 1,ng
    212             sum_up      = sum_up      + flux_up(jg,jlev)
    213             sum_dn_diff = sum_dn_diff + flux_dn_diffuse(jg,jlev)
    214             sum_dn_dir  = sum_dn_dir  + flux_dn_direct(jg,jlev)
    215           end do
    216           flux%sw_up_clear(jcol,jlev) = sum_up
    217           flux%sw_dn_clear(jcol,jlev) = sum_dn_diff + sum_dn_dir
    218           if (allocated(flux%sw_dn_direct_clear)) then
    219             flux%sw_dn_direct_clear(jcol,jlev) = sum_dn_dir
    220           end if
    221         end do
    222 #endif
    223        
    224188        ! Store spectral downwelling fluxes at surface
    225         do jg = 1,ng
    226           flux%sw_dn_diffuse_surf_clear_g(jg,jcol) = flux_dn_diffuse(jg,nlev+1)
    227           flux%sw_dn_direct_surf_clear_g(jg,jcol)  = flux_dn_direct(jg,nlev+1)
    228         end do
     189        flux%sw_dn_diffuse_surf_clear_g(:,jcol) = flux_dn_diffuse(:,nlev+1)
     190        flux%sw_dn_direct_surf_clear_g(:,jcol)  = flux_dn_direct(:,nlev+1)
    229191
    230192        ! Do cloudy-sky calculation
     
    287249            else
    288250              ! Clear-sky layer: copy over clear-sky values
    289               do jg = 1,ng
    290                 reflectance(jg,jlev) = ref_clear(jg,jlev)
    291                 transmittance(jg,jlev) = trans_clear(jg,jlev)
    292                 ref_dir(jg,jlev) = ref_dir_clear(jg,jlev)
    293                 trans_dir_diff(jg,jlev) = trans_dir_diff_clear(jg,jlev)
    294                 trans_dir_dir(jg,jlev) = trans_dir_dir_clear(jg,jlev)
    295               end do
     251              reflectance(:,jlev) = ref_clear(:,jlev)
     252              transmittance(:,jlev) = trans_clear(:,jlev)
     253              ref_dir(:,jlev) = ref_dir_clear(:,jlev)
     254              trans_dir_diff(:,jlev) = trans_dir_diff_clear(:,jlev)
     255              trans_dir_dir(:,jlev) = trans_dir_dir_clear(:,jlev)
    296256            end if
    297257          end do
     
    304264         
    305265          ! Store overcast broadband fluxes
    306 #ifdef DWD_REDUCTION_OPTIMIZATIONS
    307           sum_aux(:,:) = 0.0_jprb
    308           do jg = 1,ng
    309             do jlev = 1,nlev+1
    310               sum_aux(jlev,1) = sum_aux(jlev,1) + flux_up(jg,jlev)
    311               sum_aux(jlev,2) = sum_aux(jlev,2) + flux_dn_direct(jg,jlev)
    312               sum_aux(jlev,3) = sum_aux(jlev,3) + flux_dn_diffuse(jg,jlev)
    313             end do
    314           end do
    315           flux%sw_up(jcol,:) = sum_aux(:,1)
    316           flux%sw_dn(jcol,:) = sum_aux(:,2) + sum_aux(:,3)
     266          flux%sw_up(jcol,:) = sum(flux_up,1)
    317267          if (allocated(flux%sw_dn_direct)) then
    318             flux%sw_dn_direct(jcol,:) = sum_aux(:,2)
     268            flux%sw_dn_direct(jcol,:) = sum(flux_dn_direct,1)
     269            flux%sw_dn(jcol,:) = sum(flux_dn_diffuse,1) &
     270                 &  + flux%sw_dn_direct(jcol,:)
     271          else
     272            flux%sw_dn(jcol,:) = sum(flux_dn_diffuse,1) &
     273                 &  + sum(flux_dn_direct,1)
    319274          end if
    320 #else
    321           do jlev = 1,nlev+1
    322             sum_up      = 0.0_jprb
    323             sum_dn_diff = 0.0_jprb
    324             sum_dn_dir  = 0.0_jprb
    325             !$omp simd reduction(+:sum_up, sum_dn_diff, sum_dn_dir)
    326             do jg = 1,ng
    327               sum_up      = sum_up      + flux_up(jg,jlev)
    328               sum_dn_diff = sum_dn_diff + flux_dn_diffuse(jg,jlev)
    329               sum_dn_dir  = sum_dn_dir  + flux_dn_direct(jg,jlev)
    330             end do
    331             flux%sw_up(jcol,jlev) = sum_up
    332             flux%sw_dn(jcol,jlev) = sum_dn_diff + sum_dn_dir
    333             if (allocated(flux%sw_dn_direct)) then
    334               flux%sw_dn_direct(jcol,jlev) = sum_dn_dir
    335             end if
    336           end do
    337 #endif
    338          
     275
    339276          ! Cloudy flux profiles currently assume completely overcast
    340277          ! skies; perform weighted average with clear-sky profile
    341           do jlev = 1, nlev+1
    342             flux%sw_up(jcol,jlev) =  total_cloud_cover *flux%sw_up(jcol,jlev) &
    343                  &     + (1.0_jprb - total_cloud_cover)*flux%sw_up_clear(jcol,jlev)
    344             flux%sw_dn(jcol,jlev) =  total_cloud_cover *flux%sw_dn(jcol,jlev) &
    345                  &     + (1.0_jprb - total_cloud_cover)*flux%sw_dn_clear(jcol,jlev)
    346             if (allocated(flux%sw_dn_direct)) then
    347               flux%sw_dn_direct(jcol,jlev) = total_cloud_cover *flux%sw_dn_direct(jcol,jlev) &
    348                    &  + (1.0_jprb - total_cloud_cover)*flux%sw_dn_direct_clear(jcol,jlev)
    349             end if
    350           end do
     278          flux%sw_up(jcol,:) =  total_cloud_cover *flux%sw_up(jcol,:) &
     279               &  + (1.0_jprb - total_cloud_cover)*flux%sw_up_clear(jcol,:)
     280          flux%sw_dn(jcol,:) =  total_cloud_cover *flux%sw_dn(jcol,:) &
     281               &  + (1.0_jprb - total_cloud_cover)*flux%sw_dn_clear(jcol,:)
     282          if (allocated(flux%sw_dn_direct)) then
     283            flux%sw_dn_direct(jcol,:) = total_cloud_cover *flux%sw_dn_direct(jcol,:) &
     284                 &  + (1.0_jprb - total_cloud_cover)*flux%sw_dn_direct_clear(jcol,:)
     285          end if
    351286          ! Likewise for surface spectral fluxes
    352           do jg = 1,ng
    353             flux%sw_dn_diffuse_surf_g(jg,jcol) = flux_dn_diffuse(jg,nlev+1)
    354             flux%sw_dn_direct_surf_g(jg,jcol)  = flux_dn_direct(jg,nlev+1)
    355             flux%sw_dn_diffuse_surf_g(jg,jcol) = total_cloud_cover *flux%sw_dn_diffuse_surf_g(jg,jcol) &
    356                  &                 + (1.0_jprb - total_cloud_cover)*flux%sw_dn_diffuse_surf_clear_g(jg,jcol)
    357             flux%sw_dn_direct_surf_g(jg,jcol)  = total_cloud_cover *flux%sw_dn_direct_surf_g(jg,jcol) &
    358                  &                 + (1.0_jprb - total_cloud_cover)*flux%sw_dn_direct_surf_clear_g(jg,jcol)
    359           end do
    360 
     287          flux%sw_dn_diffuse_surf_g(:,jcol) = flux_dn_diffuse(:,nlev+1)
     288          flux%sw_dn_direct_surf_g(:,jcol)  = flux_dn_direct(:,nlev+1)
     289          flux%sw_dn_diffuse_surf_g(:,jcol) = total_cloud_cover *flux%sw_dn_diffuse_surf_g(:,jcol) &
     290               &     + (1.0_jprb - total_cloud_cover)*flux%sw_dn_diffuse_surf_clear_g(:,jcol)
     291          flux%sw_dn_direct_surf_g(:,jcol) = total_cloud_cover *flux%sw_dn_direct_surf_g(:,jcol) &
     292               &     + (1.0_jprb - total_cloud_cover)*flux%sw_dn_direct_surf_clear_g(:,jcol)
     293         
    361294        else
    362295          ! No cloud in profile and clear-sky fluxes already
    363296          ! calculated: copy them over
    364           do jlev = 1, nlev+1
    365             flux%sw_up(jcol,jlev) = flux%sw_up_clear(jcol,jlev)
    366             flux%sw_dn(jcol,jlev) = flux%sw_dn_clear(jcol,jlev)
    367             if (allocated(flux%sw_dn_direct)) then
    368               flux%sw_dn_direct(jcol,jlev) = flux%sw_dn_direct_clear(jcol,jlev)
    369             end if
    370           end do
    371           do jg = 1,ng
    372             flux%sw_dn_diffuse_surf_g(jg,jcol) = flux%sw_dn_diffuse_surf_clear_g(jg,jcol)
    373             flux%sw_dn_direct_surf_g(jg,jcol)  = flux%sw_dn_direct_surf_clear_g(jg,jcol)
    374           end do
     297          flux%sw_up(jcol,:) = flux%sw_up_clear(jcol,:)
     298          flux%sw_dn(jcol,:) = flux%sw_dn_clear(jcol,:)
     299          if (allocated(flux%sw_dn_direct)) then
     300            flux%sw_dn_direct(jcol,:) = flux%sw_dn_direct_clear(jcol,:)
     301          end if
     302          flux%sw_dn_diffuse_surf_g(:,jcol) = flux%sw_dn_diffuse_surf_clear_g(:,jcol)
     303          flux%sw_dn_direct_surf_g(:,jcol)  = flux%sw_dn_direct_surf_clear_g(:,jcol)
    375304
    376305        end if ! Cloud is present in profile
     
    378307      else
    379308        ! Set fluxes to zero if sun is below the horizon
    380         do jlev = 1, nlev+1
    381           flux%sw_up(jcol,jlev) = 0.0_jprb
    382           flux%sw_dn(jcol,jlev) = 0.0_jprb
    383           if (allocated(flux%sw_dn_direct)) then
    384             flux%sw_dn_direct(jcol,jlev) = 0.0_jprb
    385           end if
    386           flux%sw_up_clear(jcol,jlev) = 0.0_jprb
    387           flux%sw_dn_clear(jcol,jlev) = 0.0_jprb
    388           if (allocated(flux%sw_dn_direct_clear)) then
    389             flux%sw_dn_direct_clear(jcol,jlev) = 0.0_jprb
    390           end if
    391         end do
    392         do jg = 1,ng
    393           flux%sw_dn_diffuse_surf_g(jg,jcol) = 0.0_jprb
    394           flux%sw_dn_direct_surf_g(jg,jcol)  = 0.0_jprb
    395           flux%sw_dn_diffuse_surf_clear_g(jg,jcol) = 0.0_jprb
    396           flux%sw_dn_direct_surf_clear_g(jg,jcol)  = 0.0_jprb
    397         end do
     309        flux%sw_up(jcol,:) = 0.0_jprb
     310        flux%sw_dn(jcol,:) = 0.0_jprb
     311        if (allocated(flux%sw_dn_direct)) then
     312          flux%sw_dn_direct(jcol,:) = 0.0_jprb
     313        end if
     314        flux%sw_up_clear(jcol,:) = 0.0_jprb
     315        flux%sw_dn_clear(jcol,:) = 0.0_jprb
     316        if (allocated(flux%sw_dn_direct_clear)) then
     317          flux%sw_dn_direct_clear(jcol,:) = 0.0_jprb
     318        end if
     319        flux%sw_dn_diffuse_surf_g(:,jcol) = 0.0_jprb
     320        flux%sw_dn_direct_surf_g(:,jcol)  = 0.0_jprb
     321        flux%sw_dn_diffuse_surf_clear_g(:,jcol) = 0.0_jprb
     322        flux%sw_dn_direct_surf_clear_g(:,jcol)  = 0.0_jprb
    398323      end if ! Sun above horizon
    399324
Note: See TracChangeset for help on using the changeset viewer.