Context Navigation

← Previous Change
Next Change →

radiation_tripleclouds_sw.F90

Timestamp:

May 18, 2024, 8:07:34 PM (7 weeks ago)

Author:

idelkadi

Message:

The addition of explicit loops with the "omp simd reduction" directive to solve the slowness problem linked to the "sum" command (svn4938 revesion) led to non-reproducibility in MPI and mixed MPI-OMP modes in the case of Tripleclouds and Mcica solvers.
We return to the svn4848 versions of the radiation_tripleclouds_*w.F90 and radiation_mcica_*w.F90 routines.

File:

: 1 edited

LMDZ6/trunk/libf/phylmd/ecrad/radiation/radiation_tripleclouds_sw.F90 (modified) (7 diffs)

Legend:

: Unmodified
: Added
: Removed

LMDZ6/trunk/libf/phylmd/ecrad/radiation/radiation_tripleclouds_sw.F90

-                      r4853
+                      r4946
     ! Gas and aerosol optical depth, single-scattering albedo and
     ! asymmetry factor at each shortwave g-point
+    real(jprb), intent(in), dimension(config%n_g_sw,nlev,istartcol:iendcol) &
+         &  :: od, ssa, g
+!    real(jprb), intent(in), dimension(istartcol:iendcol,nlev,config%n_g_sw) :: &
+    real(jprb), intent(in), dimension(config%n_g_sw,nlev,istartcol:iendcol) :: &
+         &  od, ssa, g
     ! Cloud and precipitation optical depth, single-scattering albedo and
     ! asymmetry factor in each shortwave band
     real(jprb), intent(in), dimension(config%n_bands_sw,nlev,istartcol:iendcol) &
          &  :: od_cloud, ssa_cloud, g_cloud
+    real(jprb), intent(in), dimension(config%n_bands_sw,nlev,istartcol:iendcol) :: &
+         &  od_cloud, ssa_cloud, g_cloud
     ! Optical depth, single scattering albedo and asymmetry factor in
 …
     ! flux into a plane perpendicular to the incoming radiation at
     ! top-of-atmosphere in each of the shortwave g points
     real(jprb), intent(in), dimension(config%n_g_sw,istartcol:iendcol) &
          &  :: albedo_direct, albedo_diffuse, incoming_sw
+    real(jprb), intent(in), dimension(config%n_g_sw,istartcol:iendcol) :: &
+         &  albedo_direct, albedo_diffuse, incoming_sw
     ! Output
 …
     real(jprb) :: scat_od, scat_od_cloud
-    ! Temporaries to speed up summations
-    real(jprb) :: sum_dn_diff, sum_dn_dir, sum_up
-    ! Local cosine of solar zenith angle
     real(jprb) :: mu0
 …
       end if
+      ! Store the TOA broadband fluxes, noting that there is no
+      ! diffuse downwelling at TOA. The intrinsic "sum" command has
+      ! been found to be very slow; better performance is found on
+      ! x86-64 architecture with explicit loops and the "omp simd
+      ! reduction" directive.
+      sum_up     = 0.0_jprb
+      sum_dn_dir = 0.0_jprb
+      do jreg = 1,nregions
+        !$omp simd reduction(+:sum_up, sum_dn_dir)
+        do jg = 1,ng
+          sum_up     = sum_up     + flux_up(jg,jreg)
+          sum_dn_dir = sum_dn_dir + direct_dn(jg,jreg)
+        end do
+      end do
+      flux%sw_up(jcol,1) = sum_up
+      flux%sw_dn(jcol,1) = mu0 * sum_dn_dir
+      ! Store the TOA broadband fluxes
+      flux%sw_up(jcol,1) = sum(sum(flux_up,1))
+      flux%sw_dn(jcol,1) = mu0 * sum(sum(direct_dn,1))
       if (allocated(flux%sw_dn_direct)) then
         flux%sw_dn_direct(jcol,1) = flux%sw_dn(jcol,1)
       end if
       if (config%do_clear) then
+        sum_up     = 0.0_jprb
+        sum_dn_dir = 0.0_jprb
+        !$omp simd reduction(+:sum_up, sum_dn_dir)
+        do jg = 1,ng
+          sum_up     = sum_up     + flux_up_clear(jg)
+          sum_dn_dir = sum_dn_dir + direct_dn_clear(jg)
+        end do
+        flux%sw_up_clear(jcol,1) = sum_up
+        flux%sw_dn_clear(jcol,1) = mu0 * sum_dn_dir
+        flux%sw_up_clear(jcol,1) = sum(flux_up_clear)
+        flux%sw_dn_clear(jcol,1) = mu0 * sum(direct_dn_clear)
         if (allocated(flux%sw_dn_direct_clear)) then
           flux%sw_dn_direct_clear(jcol,1) = flux%sw_dn_clear(jcol,1)
 …
              &           config%i_spec_from_reordered_g_sw, &
              &           flux%sw_dn_band(:,jcol,1))
+        flux%sw_dn_band(:,jcol,1) = mu0 * flux%sw_dn_band(:,jcol,1)
+        flux%sw_dn_band(:,jcol,1) = &
+             &  mu0 * flux%sw_dn_band(:,jcol,1)
         if (allocated(flux%sw_dn_direct_band)) then
           flux%sw_dn_direct_band(:,jcol,1) = flux%sw_dn_band(:,jcol,1)
 …
                ! nothing to do
+        ! Store the broadband fluxes. The intrinsic "sum" command has
+        ! been found to be very slow; better performance is found on
+        ! x86-64 architecture with explicit loops and the "omp simd
+        ! reduction" directive.
+        sum_up      = 0.0_jprb
+        sum_dn_dir  = 0.0_jprb
+        sum_dn_diff = 0.0_jprb
+        do jreg = 1,nregions
+          !$omp simd reduction(+:sum_up, sum_dn_diff, sum_dn_dir)
+          do jg = 1,ng
+            sum_up      = sum_up      + flux_up(jg,jreg)
+            sum_dn_diff = sum_dn_diff + flux_dn(jg,jreg)
+            sum_dn_dir  = sum_dn_dir  + direct_dn(jg,jreg)
+          end do
+        end do
+        flux%sw_up(jcol,jlev+1) = sum_up
+        flux%sw_dn(jcol,jlev+1) = mu0 * sum_dn_dir + sum_dn_diff
+        ! Store the broadband fluxes
+        flux%sw_up(jcol,jlev+1) = sum(sum(flux_up,1))
         if (allocated(flux%sw_dn_direct)) then
+          flux%sw_dn_direct(jcol,jlev+1) = mu0 * sum_dn_dir
+          flux%sw_dn_direct(jcol,jlev+1) = mu0 * sum(sum(direct_dn,1))
+          flux%sw_dn(jcol,jlev+1) &
+               &  = flux%sw_dn_direct(jcol,jlev+1) + sum(sum(flux_dn,1))
+        else
+          flux%sw_dn(jcol,jlev+1) = mu0 * sum(sum(direct_dn,1)) + sum(sum(flux_dn,1))
         end if
         if (config%do_clear) then
+          sum_up      = 0.0_jprb
+          sum_dn_dir  = 0.0_jprb
+          sum_dn_diff = 0.0_jprb
+          !$omp simd reduction(+:sum_up, sum_dn_diff, sum_dn_dir)
+          do jg = 1,ng
+            sum_up      = sum_up      + flux_up_clear(jg)
+            sum_dn_diff = sum_dn_diff + flux_dn_clear(jg)
+            sum_dn_dir  = sum_dn_dir  + direct_dn_clear(jg)
+          end do
+          flux%sw_up_clear(jcol,jlev+1) = sum_up
+          flux%sw_dn_clear(jcol,jlev+1) = mu0 * sum_dn_dir + sum_dn_diff
+          flux%sw_up_clear(jcol,jlev+1) = sum(flux_up_clear)
           if (allocated(flux%sw_dn_direct_clear)) then
+            flux%sw_dn_direct_clear(jcol,jlev+1) = mu0 * sum_dn_dir
+            flux%sw_dn_direct_clear(jcol,jlev+1) = mu0 * sum(direct_dn_clear)
+            flux%sw_dn_clear(jcol,jlev+1) &
+                 &  = flux%sw_dn_direct_clear(jcol,jlev+1) + sum(flux_dn_clear)
+          else
+            flux%sw_dn_clear(jcol,jlev+1) = mu0 * sum(direct_dn_clear) &
+                 &  + sum(flux_dn_clear)
           end if
         end if
 …
           end if
         end if
       end do ! Final loop over levels

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 4946 for LMDZ6/trunk/libf/phylmd/ecrad/radiation/radiation_tripleclouds_sw.F90

Legend:

LMDZ6/trunk/libf/phylmd/ecrad/radiation/radiation_tripleclouds_sw.F90

Download in other formats: