source: trunk/LMDZ.COMMON/libf/evolution/deftank/lib_launchPEM.sh @ 3495

Last change on this file since 3495 was 3495, checked in by jbclement, 2 weeks ago

PEM:
Modifications related to the launching script:

  • There is actually no launching difference between the 1D and 3D models. It is more about how and where you want to execute. So now, the user can choose between two launching modes with the parameter "mode" (0 = "processing scripts"; any other values = "submitting jobs"). The former option is usually used to process the script on a local machine while the latter is used to submit jobs on supercomputer;
  • The execution command line in the job scripts that should be modified by the user according to the set-up is now given as an argument at the beginning to be more identifiable and adaptable;
  • Making the job scripts more robust to detect a successful end.

JBC

  • Property svn:executable set to *
File size: 13.7 KB
RevLine 
[3349]1#!/bin/bash
2########################################################################
3######## Library of bash functions for the PEM launching script ########
4########################################################################
5
6# To end the launching script
7endlaunch() {
8    # Restore the previous value of LC_NUMERIC
9    LC_NUMERIC=$OLD_LC_NUMERIC
10
11    date
12    echo "Successful end of the launching script for the PEM simulation."
13    exit 0
14}
15
16# To end the launching script with error
17errlaunch() {
18    # Restore the previous value of LC_NUMERIC
19    LC_NUMERIC=$OLD_LC_NUMERIC
20
21    date
22    echo "End with error of the launching script for the PEM."
23    exit 1
24}
25
[3403]26# To check what is the job scheduler
27function job_scheduler() {
28    if command -v squeue &> /dev/null; then
29        echo "SLURM is installed on $machine."
30        name_job="#SBATCH --job-name="
31        kill_job="scancel"
32        submit_job="sbatch --parsable"
33        submit_dependjob="sbatch --parsable --dependency"
34    elif command -v qstat &> /dev/null; then
35        echo "PBS/TORQUE is installed on $machine."
36        name_job="#PBS -N "
37        kill_job="qdel"
38        submit_job="qsub"
39        submit_dependjob="qsub -W depend"
40    else
41        echo "Error: neither SLURM nor TORQUE/PBS is installed on $machine!"
42        echo "You need to adapt the script to your job scheduler."
[3349]43        errlaunch
44    fi
45}
46
47# To check if everything necessary for the launching script is ok
48checklaunch() {
49    # Save the current value of LC_NUMERIC and set it to a locale that uses a dot as the decimal separator
50    OLD_LC_NUMERIC=$LC_NUMERIC
51    LC_NUMERIC=en_US.UTF-8
52
53    if [ -v n_mars_years ] && [ ! -z "$n_mars_years" ]; then
54        if [ $n_mars_years -lt 1 ]; then
55            echo "Error: the value of 'n_mars_years' must be >0!"
56            errlaunch
57        fi
58    elif [ -v n_earth_years ] && [ ! -z "$n_earth_years" ]; then
59        if [ $n_earth_years -lt 1 ]; then
60            echo "Error: the value of 'n_earth_years' must be >0!"
61            errlaunch
62        fi
63    else
64        echo "Error: no number of years to be simulated has been set!"
65        errlaunch
66    fi
67    if [ $nPCM_ini -lt 2 ] || [ -z "$nPCM_ini" ]; then
68        echo "Error: the value of 'nPCM_ini' must be >1!"
69        errlaunch
70    fi
71    if [ $nPCM -lt 2 ] || [ -z "$nPCM" ]; then
72        echo "Error: the value of 'nPCM' must be >1!"
73        errlaunch
74    fi
[3391]75    if [ ! -f "PCMrun.job" ]; then
76        echo "Error: file \"PCMrun.job\" does not exist in $dir!"
[3349]77        errlaunch
78    fi
[3391]79    if [ ! -f "PEMrun.job" ]; then
80        echo "Error: file \"PEMrun.job\" does not exist in $dir!"
[3349]81        errlaunch
82    fi
83    if [ ! -f "run_PCM.def" ]; then
84        echo "Error: file \"run_PCM.def\" does not exist in $dir!"
85        errlaunch
86    fi
87    if [ ! -f "run_PEM.def" ]; then
88        echo "Error: file \"run_PEM.def\" does not exist in $dir!"
89        errlaunch
90    fi
91    if [ ! -f "context_lmdz_physics.xml" ]; then
92        echo "Error: file \"context_lmdz_physics.xml\" does not exist in $dir!"
93        errlaunch
94    fi
95    if [ ! -f "field_def_physics_mars.xml" ]; then
96        echo "Error: file \"field_def_physics_mars.xml\" does not exist in $dir!"
97        errlaunch
98    fi
99    if [ ! -f "file_def_physics_mars.xml" ]; then
100        echo "Error: file \"file_def_physics_mars.xml\" does not exist in $dir!"
101        errlaunch
102    fi
103    if [ ! -f "iodef.xml" ]; then
104        echo "Error: file \"iodef.xml\" does not exist in $dir!"
105        errlaunch
106    fi
107    if [ ! -d "out_PCM" ]; then
108        mkdir out_PCM
109    fi
110    if [ ! -d "out_PEM" ]; then
111        mkdir out_PEM
112    fi
113    if [ ! -d "starts" ]; then
114        mkdir starts
115    fi
116    if [ ! -d "diags" ]; then
117        mkdir diags
118    fi
[3495]119    if [ $mode -ne 0 ]; then
[3403]120        job_scheduler
121    fi
[3349]122}
123
[3386]124# To convert Earth years into Mars years
125convertyears() {
[3349]126    myear=686.9725      # Number of Earth days in Martian year
127    eyear=365.256363004 # Number of days in Earth year
128    convert_years=$(echo "$myear/$eyear" | bc -l)
129    convert_years=$(printf "%.4f" $convert_years) # Rounding to the 4th decimal to respect the precision of Martian year
130    if [ -v n_mars_years ]; then
131        n_myear=$n_mars_years
132        echo "Number of years to be simulated: $n_myear Martian years."
133    elif [ -v n_earth_years ]; then
134        n_myear=$(echo "($n_earth_years/$convert_years + 0.999999)/1" | bc) # Ceiling of n_earth_years/convert_years
135        echo "Number of years to be simulated: $n_earth_years Earth years = $n_myear Martian years."
136    fi
[3386]137}
138
139# To initialize the launching script
140initlaunch() {
141    echo "This is a chained simulation for PEM and PCM runs in $dir on $machine by $user."
142    convertyears
[3349]143    i_myear=0
144    iPEM=1
[3355]145    iPCM=1
[3349]146    cp startfi.nc starts/
147    if [ -f "start.nc" ]; then
148        cp start.nc starts/
149    elif [ -f "star1D.nc" ]; then
150        cp star1D.txt starts/
151    fi
[3428]152    if [ -f "startpem.nc" ]; then
153        cp startpem.nc starts/
154    fi
[3349]155
156    # Create a file to manage years of the chained simulation and store some info from the PEM runs
157    echo $i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini > info_PEM.txt
158}
159
160# To submit the PCM runs
[3495]161# arg1: launching mode
[3391]162# arg2: number of PCM runs to launch
163# arg3: local number of the PCM run from which to start (optional)
[3349]164submitPCM() {
[3391]165    find . -type f -name "PCMrun*.job" ! -name "PCMrun.job" -delete
[3355]166    ii=1
[3391]167    if [ ! -z $3 ]; then
168        ii=$3
[3355]169    fi
[3349]170    if [ $i_myear -lt $n_myear ]; then
[3391]171        echo "Run PCM $iPCM: call $ii/$2..."
[3495]172        if [ $1 -eq 0 ]; then # Mode: processing scripts
[3432]173            sed -i "s/^k=[0-9]\+$/k=$(echo "3 - $nPCM_ini" | bc)/" PCMrun.job
[3391]174            ./PCMrun.job
[3430]175            if [ $? -ne 0 ]; then
176                errlaunch
177            fi
[3495]178        else # Mode: launching jobs
[3391]179            cp PCMrun.job PCMrun${iPCM}.job
[3403]180            sed -i -E "s/($name_job[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPCM}/" PCMrun${iPCM}.job
[3432]181            sed -i "s/^k=[0-9]\+$/k=$(echo "3 - $nPCM_ini" | bc)/" PCMrun${iPCM}.job
[3403]182            jobID=$(eval "$submit_job PCMrun${iPCM}.job")
[3391]183            # Create a file to cancel the dependent jobs of the cycle
184            echo "#!/bin/bash" > kill_launchPEM.sh
185            chmod +x kill_launchPEM.sh
[3403]186            echo $kill_job $jobID >> kill_launchPEM.sh
[3391]187        fi
[3349]188        ((iPCM++))
189        ((i_myear++))
[3355]190        ((ii++))
[3349]191    else
192        endlaunch
193    fi
[3391]194    for ((i = $ii; i <= $2; i++)); do
[3349]195        if [ $i_myear -lt $n_myear ]; then
[3391]196            echo "Run PCM $iPCM: call $i/$2..."
[3495]197            if [ $1 -eq 0 ]; then # Mode: processing scripts
[3432]198                sed -i "s/^k=[0-9]\+$/k=$(echo "$i + 2 - $nPCM_ini" | bc)/" PCMrun.job
[3391]199                ./PCMrun.job
[3430]200                if [ $? -ne 0 ]; then
201                    errlaunch
202                fi
[3495]203            else # Mode: launching jobs
[3391]204                cp PCMrun.job PCMrun${iPCM}.job
[3403]205                sed -i -E "s/($name_job[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPCM}/" PCMrun${iPCM}.job
[3432]206                sed -i "s/^k=[0-9]\+$/k=$(echo "$i + 2 - $nPCM_ini" | bc)/" PCMrun${iPCM}.job
[3403]207                jobID=$(eval "$submit_dependjob=afterok:${jobID} PCMrun${iPCM}.job")
208                echo $kill_job $jobID >> kill_launchPEM.sh
[3391]209            fi
[3349]210            ((iPCM++))
211            ((i_myear++))
212        else
213            endlaunch
214        fi
215    done
216}
217
218# To submit the PEM run
[3495]219# arg1: launching mode
[3349]220submitPEM() {
221    if [ $i_myear -lt $n_myear ]; then
[3355]222        echo "Run PEM $iPEM"
[3495]223        if [ $1 -eq 0 ]; then # Mode: processing scripts
[3391]224            ./PEMrun.job
[3430]225            if [ $? -ne 0 ]; then
226                errlaunch
227            fi
[3495]228        else # Mode: launching jobs
[3403]229            sed -i -E "s/($name_job[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPEM}/" PEMrun.job
230            jobID=$(eval "$submit_job PEMrun.job")
[3391]231            # Create a file to cancel the dependent jobs of the cycle
232            echo "#!/bin/bash" > kill_launchPEM.sh
233            chmod +x kill_launchPEM.sh
[3403]234            echo $kill_job $jobID >> kill_launchPEM.sh
[3391]235        fi
[3349]236    else
237        endlaunch
238    fi
239}
240
241# To make one cycle of PCM and PEM runs
[3495]242# arg1: launching mode
[3391]243# arg2: number of PCM runs to launch
244# arg3: local number of the PCM run from which to start (optional)
[3349]245cyclelaunch() {
246    # PCM runs
[3391]247    submitPCM $1 $2 $3
[3349]248
249    # PEM run
[3355]250    if [ $i_myear -lt $n_myear ]; then
251        echo "Run PEM $iPEM"
[3495]252        if [ $1 -eq 0 ]; then # Mode: processing scripts
[3391]253            ./PEMrun.job
[3430]254            if [ $? -ne 0 ]; then
255                errlaunch
256            fi
[3495]257        else # Mode: launching jobs
[3403]258            sed -i -E "s/($name_job[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPEM}/" PEMrun.job
259            jobID=$(eval "$submit_dependjob=afterok:${jobID} PEMrun.job")
260            echo $kill_job $jobID >> kill_launchPEM.sh
[3391]261        fi
[3355]262    else
263        endlaunch
264    fi
[3349]265}
[3355]266
[3365]267# To clean files after the starting run of the relaunch
[3386]268# arg1: file name prefix to clean
269# arg2: file name extension to clean
270# arg3: file number from which to clean
[3365]271cleanfiles() {
272    prefix=$1
273    extension=$2
274    if [ -z "$extension" ]; then
275        for file in ${prefix}*; do
276            num=${file#$prefix}
277            if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then
278                rm $file
279            fi
280        done
281    else
282        for file in ${prefix}*${extension}; do
283            num=${file#$prefix}
284            num=${num%$extension}
285            if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then
286                rm $file
287            fi
288        done
289    fi
290}
291
[3355]292# To relaunch from PCM run
[3495]293# arg1: launching mode
[3355]294relaunchPCM() {
295    iPCM=$(($irelaunch + 1))
[3365]296    cleanfiles diags/diagfi .nc $irelaunch
[3386]297    cleanfiles diags/data2reshape .nc $irelaunch
[3365]298    cleanfiles "out_PCM/run" "" $irelaunch
299    cleanfiles starts/restart1D .txt $irelaunch
300    cleanfiles starts/restart .nc $irelaunch
301    cleanfiles starts/restartfi .nc $irelaunch
[3355]302    cp starts/restartfi${irelaunch}.nc startfi.nc
303    if [ -f "starts/restart${irelaunch}.nc" ]; then
304        cp starts/restart${irelaunch}.nc start.nc
305    elif [ -f "starts/restart1D${irelaunch}.txt" ]; then
306        cp starts/restart1D${irelaunch}.txt start1D.txt
307    fi
308    if [ $irelaunch -le $nPCM_ini ]; then
309        # PCM relaunch during the initialization cycle
310        iPEM=1
[3365]311        cleanfiles diags/diagpem .nc $iPEM
312        cleanfiles "out_PEM/run" "" $iPEM
313        cleanfiles starts/restart1D_postPEM .txt $iPEM
314        cleanfiles starts/restart_postPEM .nc $iPEM
315        cleanfiles starts/restartfi_postPEM .nc $iPEM
316        cleanfiles starts/restartpem .nc $iPEM
[3355]317        i_myear=$irelaunch
318        sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
[3367]319        rm -f startpem.nc
[3355]320        if [ $irelaunch -eq $(($nPCM_ini - 1)) ]; then
321            cp diags/data2reshape${irelaunch}.nc data2reshape_Y1.nc
[3391]322            cyclelaunch $1 $nPCM_ini $iPCM
[3383]323        elif [ $irelaunch -eq $nPCM_ini ]; then
[3355]324            cp diags/data2reshape$(($irelaunch - 1)).nc data2reshape_Y1.nc
325            cp diags/data2reshape${irelaunch}.nc data2reshape_Y2.nc
[3391]326            submitPEM $1 # The next job is a PEM run
[3355]327        else
[3391]328            cyclelaunch $1 $nPCM_ini $iPCM
[3355]329        fi
330    else
331        # PCM relaunch during a cycle
[3446]332        iPEM=$(echo "($iPCM - $nPCM_ini)/$nPCM + 1" | bc)
333        il=$(echo "($irelaunch - $nPCM_ini + 1)%$nPCM + 1" | bc)
[3365]334        cleanfiles diags/diagpem .nc $iPEM
335        cleanfiles "out_PEM/run" "" $iPEM
336        cleanfiles starts/restart1D_postPEM .txt $iPEM
337        cleanfiles starts/restart_postPEM .nc $iPEM
338        cleanfiles starts/restartfi_postPEM .nc $iPEM
339        cleanfiles starts/restartpem .nc $iPEM
[3355]340        cp starts/restartpem${iPEM}.nc startpem.nc
[3428]341        if [ $il -eq $(($nPCM - 1)) ]; then # Second to last PCM run
[3365]342            i_myear=$(($(awk "NR==$iPEM {print \$1}" "info_PEM.txt") + $il))
343            sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
[3355]344            cp diags/data2reshape${irelaunch}.nc data2reshape_Y1.nc
[3391]345            cyclelaunch $1 $nPCM $il
[3428]346        elif [ $il -eq $nPCM ]; then # Last PCM run so the next job is a PEM run
[3365]347            i_myear=$(($(awk "NR==$iPEM {print \$1}" "info_PEM.txt") + $nPCM))
348            sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
[3355]349            cp diags/data2reshape$(($irelaunch - 1)).nc data2reshape_Y1.nc
350            cp diags/data2reshape${irelaunch}.nc data2reshape_Y2.nc
[3428]351            submitPEM $1
[3355]352        else
[3365]353            i_myear=$(($(awk "NR==$iPEM {print \$1}" "info_PEM.txt") + $il))
354            sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
[3391]355            cyclelaunch $1 $nPCM $il
[3355]356        fi
357    fi
358}
359
360# To relaunch from PEM run
[3495]361# arg1: launching mode
[3355]362relaunchPEM() {
[3432]363    iPEM=$(echo "$irelaunch + 1" | bc)
364    iPCM=$(echo "$nPCM_ini + $nPCM*($irelaunch - 1) + 1" | bc)
[3355]365    i_myear=$(awk "NR==$(($iPEM + 1)) {print \$1}" "info_PEM.txt")
366    sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
[3365]367    cleanfiles diags/diagfi .nc $(($iPCM - 1))
368    cleanfiles "out_PCM/run" "" $(($iPCM - 1))
369    cleanfiles starts/restart1D .txt $(($iPCM - 1))
370    cleanfiles starts/restart .nc $(($iPCM - 1))
371    cleanfiles starts/restartfi .nc $(($iPCM - 1))
372    cleanfiles diags/data2reshape .nc $(($iPCM - 1))
373    cleanfiles diags/diagpem .nc $irelaunch
374    cleanfiles "out_PEM/run" "" $irelaunch
375    cleanfiles starts/restart1D_postPEM .txt $irelaunch
376    cleanfiles starts/restart_postPEM .nc $irelaunch
377    cleanfiles starts/restartfi_postPEM .nc $irelaunch
378    cleanfiles starts/restartpem .nc $irelaunch
[3355]379    cp starts/restartpem${irelaunch}.nc startpem.nc
380    cp starts/restartfi_postPEM${irelaunch}.nc startfi.nc
381    if [ -f "starts/restart_postPEM${irelaunch}.nc" ]; then
382        cp starts/restart_postPEM${irelaunch}.nc start.nc
383    elif [ -f "starts/restart1D_postPEM${irelaunch}.txt" ]; then
384        cp starts/restart1D_postPEM${irelaunch}.txt start1D.txt
385    fi
[3391]386    cyclelaunch $1 $nPCM
[3355]387}
Note: See TracBrowser for help on using the repository browser.