Changeset 3403 for trunk/LMDZ.COMMON


Ignore:
Timestamp:
Aug 9, 2024, 6:09:15 PM (3 months ago)
Author:
jbclement
Message:

PEM:
Addition in the launching script of the possibility to submit a job with PBS/TORQUE + Modification to make the time limit detection in "pem.F90" work with PBS/TORQUE + Update of the headers of .job files.
JBC

Location:
trunk/LMDZ.COMMON/libf/evolution
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/LMDZ.COMMON/libf/evolution/changelog.txt

    r3399 r3403  
    401401== 24/07/2024 == JBC
    402402Small corection for the 1D related to r3386 and r3369 + Making the computation of maximum number of iterations due to orbital variations more robust.
     403
     404== 09/08/2024 == JBC
     405Addition in the launching script of the possibility to submit a job with PBS/TORQUE + Modification to make the time limit detection in "pem.F90" work with PBS/TORQUE + Update of the headers of .job files.
  • trunk/LMDZ.COMMON/libf/evolution/deftank/PCMrun.job

    r3391 r3403  
    1010#SBATCH --threads-per-core=1 # --hint=nomultithread
    1111###SBATCH --exclusive
    12 #SBATCH --output=jobPCM_%A.out
     12#SBATCH --output=jobPCM_%j.out
    1313#SBATCH --time=12:00:00
    1414
     
    1717source ../trunk/LMDZ.COMMON/arch.env
    1818
    19 # Number of threads to use (must be the same as '#MSUB -c' above)
    20 export OMP_NUM_THREADS=4
     19# Number of threads to use (must be the same as "#SBATCH --cpus-per-task=" above)
     20export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
    2121export OMP_STACKSIZE=400M
    2222
     
    2424exePCM="gcm_64x48x32_phymars_para.e"
    2525########################################################################
     26
     27
     28ulimit -s unlimited
    2629
    2730# Running the PCM
  • trunk/LMDZ.COMMON/libf/evolution/deftank/PEMrun.job

    r3391 r3403  
    55#SBATCH --constraint=GENOA
    66### Number of Nodes to use
    7 #SBATCH --nodes=4 # to run with enough memory
     7#SBATCH --nodes=1
    88#SBATCH --ntasks-per-node=1
    99#SBATCH --cpus-per-task=1
    10 #SBATCH --threads-per-core=1 # --hint=nomultithread
    1110###SBATCH --exclusive
    12 #SBATCH --output=jobPEM_%A.out
     11#SBATCH --output=jobPEM_%j.out
    1312#SBATCH --time=24:00:00
    1413
     
    2423########################################################################
    2524
     25
     26ulimit -s unlimited
    2627
    2728# Reshaping PCM data with XIOS
  • trunk/LMDZ.COMMON/libf/evolution/deftank/README

    r3394 r3403  
    1717
    1818# PCMrun.job:
    19   Bash script file to run the PCM with a SLURM job. The name of the PCM executable file should be adapted. The header corresponds to the ADASTRA supercomputer and should be changed for other machines. In case of 1D, the SBATCH headers are omitted.
     19  Bash script file to submit a PCM job (with SLURM or PBS/TORQUE). The name of the PCM executable file should be adapted. The headers correspond to the ADASTRA supercomputer and should be changed for other machines and job schedulers. In case of 1D, the headers are naturally omitted.
    2020  The path to source the arch file should be adapted to the machine.
    2121  The execution line should also be adapted according to the set-up.
    2222
    2323# PEMrun.job:
    24   Bash script file to run the PEM with a SLURM job. The name of the PEM executable file and Reshaping executable file should be adapted. The header corresponds to the ADASTRA supercomputer and should be changed for other machines. In case of 1D, the SBATCH headers are omitted.
     24  Bash script file to submit PEM job (with SLURM or PBS/TORQUE). The name of the PEM executable file and Reshaping executable file should be adapted. The headers correspond to the ADASTRA supercomputer and should be changed for other machines and job schedulers. In case of 1D, the headers are naturally omitted.
    2525  The path to source the arch file should be adapted to the machine.
    2626  The execution line should also be adapted according to the set-up.
  • trunk/LMDZ.COMMON/libf/evolution/deftank/lib_launchPEM.sh

    r3394 r3403  
    2424}
    2525
    26 # To check if SLURM is the job scheduler
    27 function is_slurm() {
    28     if ! command -v squeue &> /dev/null; then
    29         echo "Error: the job scheduler is not SLURM on $machine!"
    30         echo "You need to adapt the script to your case."
     26# To check what is the job scheduler
     27function job_scheduler() {
     28    if command -v squeue &> /dev/null; then
     29        echo "SLURM is installed on $machine."
     30        name_job="#SBATCH --job-name="
     31        kill_job="scancel"
     32        submit_job="sbatch --parsable"
     33        submit_dependjob="sbatch --parsable --dependency"
     34        sed -i 's/\$PBS_JOBID/\$SLURM_JOB_ID/g' PEMrun.job
     35    elif command -v qstat &> /dev/null; then
     36        echo "PBS/TORQUE is installed on $machine."
     37        name_job="#PBS -N "
     38        kill_job="qdel"
     39        submit_job="qsub"
     40        submit_dependjob="qsub -W depend"
     41        sed -i 's/\$SLURM_JOB_ID/\$PBS_JOBID/g' PEMrun.job
     42    else
     43        echo "Error: neither SLURM nor TORQUE/PBS is installed on $machine!"
     44        echo "You need to adapt the script to your job scheduler."
    3145        errlaunch
    3246    fi
     
    3953    LC_NUMERIC=en_US.UTF-8
    4054
    41     if [ $dim -ne 1 ]; then # 3D model works only with SLURM for now (because of chained job submission in the script + time limit management)
    42         is_slurm
    43     fi
    4455    if [ -v n_mars_years ] && [ ! -z "$n_mars_years" ]; then
    4556        if [ $n_mars_years -lt 1 ]; then
     
    107118    if [ ! -d "diags" ]; then
    108119        mkdir diags
     120    fi
     121    if [ $dim -ne 1 ]; then
     122        job_scheduler
    109123    fi
    110124}
     
    160174        else # 3D model
    161175            cp PCMrun.job PCMrun${iPCM}.job
    162             sed -i -E "s/(#SBATCH --job-name=[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPCM}/" PCMrun${iPCM}.job
     176            sed -i -E "s/($name_job[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPCM}/" PCMrun${iPCM}.job
    163177            sed -i "s/^k=[0-9]\+$/k=$(echo "3 - $nPCM_ini" | bc -l)/" PCMrun${iPCM}.job
    164             jobID=$(sbatch --parsable PCMrun${iPCM}.job)
     178            jobID=$(eval "$submit_job PCMrun${iPCM}.job")
    165179            # Create a file to cancel the dependent jobs of the cycle
    166180            echo "#!/bin/bash" > kill_launchPEM.sh
    167181            chmod +x kill_launchPEM.sh
    168             echo "scancel" $jobID >> kill_launchPEM.sh
     182            echo $kill_job $jobID >> kill_launchPEM.sh
    169183        fi
    170184        ((iPCM++))
     
    182196            else # 3D model
    183197                cp PCMrun.job PCMrun${iPCM}.job
    184                 sed -i -E "s/(#SBATCH --job-name=[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPCM}/" PCMrun${iPCM}.job
     198                sed -i -E "s/($name_job[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPCM}/" PCMrun${iPCM}.job
    185199                sed -i "s/^k=[0-9]\+$/k=$(echo "$i + 2 - $nPCM_ini" | bc -l)/" PCMrun${iPCM}.job
    186                 jobID=$(sbatch --parsable --dependency=afterok:${jobID} PCMrun${iPCM}.job)
    187                 echo "scancel" $jobID >> kill_launchPEM.sh
     200                jobID=$(eval "$submit_dependjob=afterok:${jobID} PCMrun${iPCM}.job")
     201                echo $kill_job $jobID >> kill_launchPEM.sh
    188202            fi
    189203            ((iPCM++))
     
    203217            ./PEMrun.job
    204218        else # 3D model
    205             sed -i -E "s/(#SBATCH --job-name=[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPEM}/" PEMrun.job
    206             jobID=$(sbatch --parsable PEMrun.job)
     219            sed -i -E "s/($name_job[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPEM}/" PEMrun.job
     220            jobID=$(eval "$submit_job PEMrun.job")
    207221            # Create a file to cancel the dependent jobs of the cycle
    208222            echo "#!/bin/bash" > kill_launchPEM.sh
    209223            chmod +x kill_launchPEM.sh
    210             echo "scancel" $jobID >> kill_launchPEM.sh
     224            echo $kill_job $jobID >> kill_launchPEM.sh
    211225        fi
    212226    else
     
    229243            ./PEMrun.job
    230244        else # 3D model
    231             sed -i -E "s/(#SBATCH --job-name=[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPEM}/" PEMrun.job
    232             jobID=$(sbatch --parsable --dependency=afterok:${jobID} PEMrun.job)
    233             echo "scancel" $jobID >> kill_launchPEM.sh
     245            sed -i -E "s/($name_job[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPEM}/" PEMrun.job
     246            jobID=$(eval "$submit_dependjob=afterok:${jobID} PEMrun.job")
     247            echo $kill_job $jobID >> kill_launchPEM.sh
    234248        fi
    235249    else
  • trunk/LMDZ.COMMON/libf/evolution/pem.F90

    r3399 r3403  
    293293    ! Execute the system command
    294294    call execute_command_line('squeue -j '//trim(job_id)//' -h --Format TimeLimit > tmp_cmdout.txt',cmdstat = cstat)
    295     if (cstat > 0) then
    296         error stop 'pem: command execution failed!'
    297     else if (cstat < 0) then
    298         error stop 'pem: command execution not supported!'
     295    if (cstat /= 0) then
     296        call execute_command_line('qstat -f '//trim(job_id)//' | grep "Walltime" | awk ''{print $3}'' > tmp_cmdout.txt', cmdstat = cstat)
     297        if (cstat > 0) then
     298            error stop 'pem: command execution failed!'
     299        else if (cstat < 0) then
     300            error stop 'pem: command execution not supported (neither SLURM nor PBS/TORQUE is installed)!'
     301        endif
    299302    endif
    300303    ! Read the output
     
    703706#endif
    704707
    705 if (evol_orbit_pem) then
    706     call orbit_param_criterion(i_myear,year_iter_max)
    707 else
    708     year_iter_max = Max_iter_pem
    709 endif
     708year_iter_max = Max_iter_pem
     709if (evol_orbit_pem) call orbit_param_criterion(i_myear,year_iter_max)
     710
    710711!-------------------------- END INITIALIZATION -------------------------
    711712
Note: See TracChangeset for help on using the changeset viewer.