Ignore:
Timestamp:
Jun 10, 2024, 4:24:43 PM (8 months ago)
Author:
jbclement
Message:

PEM:

  • The PEM can now stop itself cleanly before the SLURM time limit for the job is reached and it continues the simulation with a new cycle.
  • Update of "jobPEM.slurm" in the deftank to guarantee enough memory space to run the job.
  • Few minor cleanings.

JBC

Location:
trunk/LMDZ.COMMON/libf/evolution
Files:
5 edited
1 moved

Legend:

Unmodified
Added
Removed
  • trunk/LMDZ.COMMON/libf/evolution/changelog.txt

    r3355 r3363  
    348348The launching script can now operate a relaunch from any already computed PCM/PEM run. The starting point is asked to the user interactively on the terminal.
    349349Some features might not work very well yet since not every stituation has not been tested.
     350
     351== 10/06/2024 == JBC
     352- The PEM can now stop itself cleanly before the SLURM time limit for the job is reached and it continues the simulation with a new cycle.
     353- Update of "jobPEM.slurm" in the deftank to guarantee enough memory space to run the job.
     354- Few minor cleanings.
  • trunk/LMDZ.COMMON/libf/evolution/deftank/jobPEM.slurm

    r3354 r3363  
    55#SBATCH --constraint=GENOA
    66### Number of Nodes to use
    7 #SBATCH --nodes=1
     7#SBATCH --nodes=4 # to run with enough memory
    88#SBATCH --ntasks-per-node=1
    99#SBATCH --cpus-per-task=1
     
    6161fi
    6262
    63 ./launchPEM.sh new
     63# Launch the next cycle
     64#if [ "$(awk 'END{print $NF}' info_PEM.txt)" -eq 7 ]; then
     65#    read i_myear n_myear convert_years iPCM iPEM nPCM nPCM_ini < info_PEM.txt
     66#    ./launchPEM.sh cont # Continue the PEM run if it stopped because of job time limit
     67#else
     68    ./launchPEM.sh new
     69#fi
  • trunk/LMDZ.COMMON/libf/evolution/deftank/launchPEM.sh

    r3355 r3363  
    6464            errlaunch
    6565        fi
     66        echo "The relaunch is initialized with a specific previous successful run."
    6667        while true; do
    6768            echo "Do you want to relaunch from a 'PCM' or 'PEM' run?"
     
    106107            relaunchPEM
    107108        fi
     109
     110    # Continuing the PEM run
     111    elif [ $1 = "cont" ]; then
     112        exec >> log_launchPEM.txt 2>&1
     113        echo
     114        echo "This is a continuation of the previous PEM run."
     115        date
     116        submitPEM
     117
     118    # Default case: error
    108119    else
    109120        echo "Error: given argument '$1' for the launching script is unknown!"
  • trunk/LMDZ.COMMON/libf/evolution/pem.F90

    r3339 r3363  
    228228
    229229! Some variables for the PEM run
    230 real, parameter :: year_step = 1 ! timestep for the pem
    231 integer         :: year_iter     ! number of iteration
    232 integer         :: year_iter_max ! maximum number of iterations before stopping
    233 integer         :: i_myear       ! Global number of Martian years of the chained simulations
    234 integer         :: n_myear       ! Maximum number of Martian years of the chained simulations
    235 real            :: timestep      ! timestep [s]
     230real, parameter :: year_step = 1   ! Timestep for the pem
     231integer         :: year_iter       ! Number of iteration
     232integer         :: year_iter_max   ! Maximum number of iterations before stopping
     233integer         :: i_myear         ! Global number of Martian years of the chained simulations
     234integer         :: n_myear         ! Maximum number of Martian years of the chained simulations
     235real            :: timestep        ! Timestep [s]
     236character(20)   :: job_id          ! Job id provided as argument passed on the command line when the program was invoked
     237integer(kind=8) :: cr              ! Number of clock ticks per second (count rate)
     238integer(kind=8) :: c1, c2          ! Counts of processor clock
     239character(100)  :: chtimelimit     ! Time limit for the PEM job outputted by the SLURM command
     240real            :: timelimit       ! Time limit for the PEM job in seconds
     241real, parameter :: antetime = 1200 ! Anticipation time to prevent reaching the time time limit: 1200 s = 20 min by default
     242integer         :: cstat, days, hours, minutes, seconds
     243character(1)    :: sep
    236244
    237245#ifdef CPP_STD
     
    274282integer :: i, l, ig, nnq, t, islope, ig_loop, islope_loop, isoil, icap
    275283
     284! Elapsed time with system clock
     285call system_clock(count_rate = cr)
     286call system_clock(c1)
     287timelimit = 86400 ! 86400 seconds = 24 h by default
     288if (command_argument_count() > 0) then
     289    ! Read the job id passed as argument to the program
     290    call get_command_argument(1,job_id)
     291    ! Execute the system command
     292    call execute_command_line('squeue -j '//trim(job_id)//' -h --Format TimeLimit > tmp_cmdout.txt',cmdstat = cstat)
     293    if (cstat > 0) then
     294        error stop 'pem: command execution failed!'
     295    else if (cstat < 0) then
     296        error stop 'pem: command execution not supported!'
     297    endif
     298    ! Read the output
     299    open(1,file = 'tmp_cmdout.txt',status = 'old')
     300    read(1,'(a)') chtimelimit
     301    close(1)
     302    chtimelimit = trim(chtimelimit)
     303    call execute_command_line('rm tmp_cmdout.txt',cmdstat = cstat)
     304    if (cstat > 0) then
     305        error stop 'pem: command execution failed!'
     306    else if (cstat < 0) then
     307        error stop 'pem: command execution not supported!'
     308    endif
     309    ! Assume 'chtimelimit' format is "D-HH:MM:SS" or "HH:MM:SS"
     310    if (index(chtimelimit,'-') > 0) then
     311        read(chtimelimit, '(i1,a1,i2,a1,i2,a1,i2)') days, sep, hours, sep, minutes, sep, seconds
     312        timelimit = days*86400 + hours*3600 + minutes*60 + seconds
     313    else
     314        read(chtimelimit,'(i2,a1,i2,a1,i2)') hours, sep, minutes, sep, seconds
     315        timelimit = hours*3600 + minutes*60 + seconds
     316    endif
     317endif
     318
    276319! Parallel variables
    277320#ifndef CPP_STD
     
    310353
    311354    therestart1D = .false. ! Default value
    312     inquire(file = 'start1D_evol.txt',exist = therestart1D)
     355    inquire(file = 'start1D.txt',exist = therestart1D)
    313356    if (.not. therestart1D) then
    314         write(*,*) 'There is no "start1D_evol.txt" file!'
     357        write(*,*) 'There is no "start1D.txt" file!'
    315358        error stop 'Initialization cannot be done for the 1D PEM.'
    316359    endif
     
    322365    endif
    323366
    324     call init_testphys1d('start1D_evol.txt','startfi.nc',therestart1D,therestartfi,ngrid,nlayer,610.,nq,q, &
    325                          time_0,ps(1),ucov,vcov,teta,ndt,ptif,pks,dtphys,zqsat,dq,dqdyn,day0,day,gru,grv,w,     &
     367    call init_testphys1d('start1D.txt','startfi.nc',therestart1D,therestartfi,ngrid,nlayer,610.,nq,q,        &
     368                         time_0,ps(1),ucov,vcov,teta,ndt,ptif,pks,dtphys,zqsat,dq,dqdyn,day0,day,gru,grv,w,  &
    326369                         play,plev,latitude,longitude,cell_area,atm_wat_profile,atm_wat_tau)
    327370    ps(2) = ps(1)
     
    505548write(*,*) "Downloading data Y1..."
    506549call read_data_PCM("data_PCM_Y1.nc",timelen,iim,jjm_value,ngrid,nslope,vmr_co2_PCM,ps_timeseries,min_co2_ice(:,:,1),min_h2o_ice(:,:,1), &
    507                    tsurf_avg_yr1,tsoil_ave,tsurf_PCM_timeseries,tsoil_PCM_timeseries,q_co2_PEM_phys,q_h2o_PEM_phys,                      &
     550                   tsurf_avg_yr1,tsoil_ave,tsurf_PCM_timeseries,tsoil_PCM_timeseries,q_co2_PEM_phys,q_h2o_PEM_phys,                     &
    508551                   co2_ice_PCM,watersurf_density_ave,watersoil_density_timeseries)
    509552write(*,*) "Downloading data Y1 done!"
     
    9581001!    II_g Checking the stopping criterion
    9591002!------------------------
     1003    call system_clock(c2)
     1004    if (timelimit - real((c2 - c1)/cr) <= antetime) stopPEM = 7
    9601005    call stopping_crit_h2o_ice(cell_area,h2oice_ini_surf,ini_h2oice_sublim,h2o_ice,stopPEM,ngrid)
    9611006    call stopping_crit_co2(cell_area,co2ice_ini_surf,ini_co2ice_sublim,co2_ice,stopPEM,ngrid,global_avg_press_PCM,global_avg_press_new,nslope)
     
    9811026            case(6)
    9821027                write(*,*) "STOPPING because maximum number of Martian years to be simulated is reached:", stopPEM
     1028            case(7)
     1029                write(*,*) "STOPPING because the time limit for the PEM job will be reached soon:", stopPEM
    9831030            case default
    9841031                write(*,*) "STOPPING with unknown stopping criterion code:", stopPEM
     
    11041151    write(*,*) "restart.nc has been written"
    11051152#else
    1106     call writerestart1D('restart1D_evol.txt',ps(1),tsurf(1,:),nlayer,size(tsurf,2),teta,ucov,vcov,nq,noms,qsurf(1,:,:),q)
    1107     write(*,*) "restart1D_evol.txt has been written"
     1153    call writerestart1D('restart1D.txt',ps(1),tsurf(1,:),nlayer,size(tsurf,2),teta,ucov,vcov,nq,noms,qsurf(1,:,:),q)
     1154    write(*,*) "restart1D.txt has been written"
    11081155#endif
    11091156
  • trunk/LMDZ.COMMON/libf/evolution/read_data_PCM_mod.F90

    r3210 r3363  
    8181call error_msg(NF90_OPEN(filename,NF90_NOWRITE,fID),"open",filename)
    8282
    83 ! Dowload the data from the file
     83! Download the data from the file
    8484call get_var3("ps",ps_PCM)
    8585write(*,*) "Data for surface pressure downloaded!"
     
    172172#endif
    173173endif
     174
     175! Close the NetCDF file
     176write(*,*) "Closing "//filename//"..."
     177call error_msg(nf90_close(fID),"close",filename)
    174178
    175179! Compute the minimum over the year for each point
     
    336340    case('inq');   msg="Field <"//trim(nam)//"> is missing"
    337341    case('get');   msg="Reading failed for <"//trim(nam)//">"
     342    case('put');   msg="Writing failed for <"//trim(nam)//">"
    338343    case('open');  msg="File opening failed for <"//trim(nam)//">"
    339344    case('close'); msg="File closing failed for <"//trim(nam)//">"
Note: See TracChangeset for help on using the changeset viewer.