source: trunk/LMDZ.COMMON/libf/evolution/deftank/lib_launchPEM.sh

Last change on this file was 3869, checked in by jbclement, 4 days ago

PEM:
Bug correction to detect the job time limit with PBS/TORQUE. Making it more robust and automatic for the launching script and the Fortran code.
JBC

  • Property svn:executable set to *
File size: 17.6 KB
Line 
1#!/bin/bash
2########################################################################
3######## Library of bash functions for the PEM launching script ########
4########################################################################
5
6# To end the launching script
7endlaunch() {
8    # Restore the previous value of LC_NUMERIC
9    LC_NUMERIC=$OLD_LC_NUMERIC
10
11    date
12    echo "Success: the launching script for the PEM simulation completed normally!"
13    exit 0
14}
15
16# To end the launching script with error
17errlaunch() {
18    # Restore the previous value of LC_NUMERIC
19    LC_NUMERIC=$OLD_LC_NUMERIC
20
21    date
22    echo "Error: an issue occured in the launching script for the PEM simulation!"
23    exit 1
24}
25
26# To check what is the job scheduler
27function job_scheduler() {
28    if command -v squeue &> /dev/null; then
29        echo "SLURM is installed on $machine."
30        scheduler="SLURM"
31        name_job="#SBATCH --job-name="
32        kill_job="scancel"
33    elif command -v qstat &> /dev/null; then
34        echo "PBS/TORQUE is installed on $machine."
35        scheduler="PBS"
36        name_job="#PBS -N "
37        kill_job="qdel"
38    else
39        echo "Error: neither SLURM nor TORQUE/PBS is installed on $machine!"
40        echo "You need to adapt the script to your job scheduler or set 'mode' to 0."
41        errlaunch
42    fi
43}
44
45# To get the number of slopes for the simulation
46get_nslope() {
47    ns=1
48    if [ -f "startfi.nc" ]; then
49        ns=$(ncdump -h startfi.nc | sed -n 's/.*nslope = \([0-9]*\) ;.*/\1/p')
50    else
51        for f in run_PCM.def callphys.def; do
52            if [[ -f "$f" ]]; then
53                while IFS= read -r line; do
54                    # Remove leading whitespace
55                    trimmed=$(echo "$line" | sed 's/^[[:space:]]*//')
56                    # Skip lines that are commented out
57                    if [[ "$trimmed" == \#* ]]; then
58                        continue
59                    fi
60                    # Check if line contains 'nslope = N'
61                    if [[ "$trimmed" =~ ^nslope[[:space:]]*=[[:space:]]*([0-9]+) ]]; then
62                        ns="${BASH_REMATCH[1]}"
63                        break
64                    fi
65                done < "$f"
66                [[ -n "$ns" ]] && break
67            fi
68        done
69    fi
70}
71
72# To modify the xml file according nslope
73modify_xml() {
74    tmp="tmp_file_def.xml"
75    in_diurnalave=false
76    in_diurnalave_s=false
77
78    sed -i 's/enabled="\.true\.\">/enabled=".false.">/g' file_def_physics_mars.xml
79    while IFS= read -r line; do
80        case "$line" in
81            *'<file id="diurnalave"'*)
82                in_diurnalave=true
83                ;;
84            *'<file id="diurnalave_s"'*)
85                in_diurnalave_s=true
86                ;;
87        esac
88
89        if [[ $line == *'enabled="'* ]]; then
90            if $in_diurnalave; then
91                if [[ $ns -eq 1 ]]; then
92                    line='              enabled=".true.">'
93                else
94                    line='              enabled=".false.">'
95                fi
96            elif $in_diurnalave_s; then
97                if [[ $ns -eq 1 ]]; then
98                    line='              enabled=".false.">'
99                else
100                    line='              enabled=".true.">'
101                fi
102            fi
103        fi
104
105        case "$line" in
106            *'</file>'*)
107                in_diurnalave=false
108                in_diurnalave_s=false
109                ;;
110        esac
111
112        echo "$line" >> "$tmp"
113    done < file_def_physics_mars.xml
114
115    mv "$tmp" file_def_physics_mars.xml
116}
117
118# To check if everything necessary for the launching script is ok
119checklaunch() {
120    # Save the current value of LC_NUMERIC and set it to a locale that uses a dot as the decimal separator
121    OLD_LC_NUMERIC=$LC_NUMERIC
122    LC_NUMERIC=en_US.UTF-8
123
124    if [ -v n_mars_years ] && [ ! -z "$n_mars_years" ]; then
125        if [ $(echo "$n_mars_years <= 0." | bc -l) -eq 1 ]; then
126            echo "Error: 'n_mars_years' must be > 0!"
127            errlaunch
128        fi
129    elif [ -v n_earth_years ] && [ ! -z "$n_earth_years" ]; then
130        if [ $(echo "$n_earth_years <= 0." | bc -l) -eq 1 ]; then
131            echo "Error: 'n_earth_years' must be > 0!"
132            errlaunch
133        fi
134    else
135        echo "Error: the number of years to be simulated is not set!"
136        errlaunch
137    fi
138    if [ $nPCM_ini -lt 2 ] || [ -z "$nPCM_ini" ]; then
139        echo "Error: 'nPCM_ini' must be >= 2!"
140        errlaunch
141    fi
142    if [ $nPCM -lt 2 ] || [ -z "$nPCM" ]; then
143        echo "Error: 'nPCM' must be >= 2!"
144        errlaunch
145    fi
146    if [ ! -f "PCMrun.job" ]; then
147        echo "Error: file \"PCMrun.job\" does not exist in $dir!"
148        errlaunch
149    fi
150    if [ ! -f "PEMrun.job" ]; then
151        echo "Error: file \"PEMrun.job\" does not exist in $dir!"
152        errlaunch
153    fi
154    if [ ! -f "run_PCM.def" ]; then
155        echo "Error: file \"run_PCM.def\" does not exist in $dir!"
156        errlaunch
157    fi
158    if [ ! -f "run_PEM.def" ]; then
159        echo "Error: file \"run_PEM.def\" does not exist in $dir!"
160        errlaunch
161    fi
162    if [ ! -f "context_lmdz_physics.xml" ]; then
163        echo "Error: file \"context_lmdz_physics.xml\" does not exist in $dir!"
164        errlaunch
165    fi
166    if [ ! -f "field_def_physics_mars.xml" ]; then
167        echo "Error: file \"field_def_physics_mars.xml\" does not exist in $dir!"
168        errlaunch
169    fi
170    if [ ! -f "file_def_physics_mars.xml" ]; then
171        echo "Error: file \"file_def_physics_mars.xml\" does not exist in $dir!"
172        errlaunch
173    fi
174    if [ ! -f "iodef.xml" ]; then
175        echo "Error: file \"iodef.xml\" does not exist in $dir!"
176        errlaunch
177    fi
178    if [ ! -d "logs" ]; then
179        mkdir logs
180    fi
181    if [ ! -d "starts" ]; then
182        mkdir starts
183    fi
184    if [ ! -d "diags" ]; then
185        mkdir diags
186    fi
187    if [ $mode -ne 0 ]; then
188        job_scheduler
189    fi
190    # Set automatically the XIOS output file for the PEM according to the number of slopes
191    get_nslope
192    modify_xml
193}
194
195# To convert Earth years into Mars years
196convertyears() {
197    myear=686.9725      # Number of Earth days in Martian year
198    eyear=365.256363004 # Number of days in Earth year
199    convert_years=$(echo "$myear/$eyear" | bc -l)
200    convert_years=$(printf "%.4f" $convert_years) # Rounding to the 4th decimal to respect the precision of Martian year
201    if [ -v n_mars_years ]; then
202        n_myear=$n_mars_years
203        echo "Number of years to be simulated: $n_myear Martian years."
204    elif [ -v n_earth_years ]; then
205        n_myear=$(echo "$n_earth_years/$convert_years" | bc -l)
206        echo "Number of years to be simulated: $n_earth_years Earth years = $n_myear Martian years."
207    fi
208}
209
210# To initialize the launching script
211initlaunch() {
212    echo "This is a chained simulation for PEM and PCM runs in $dir on $machine by $user."
213    convertyears
214    i_myear=0.
215    iPEM=1
216    iPCM=1
217    if [ -f "startfi.nc" ]; then
218        cp startfi.nc starts/
219    fi
220    if [ -f "start.nc" ]; then
221        cp start.nc starts/
222    elif [ -f "start1D.txt" ]; then
223        cp start1D.txt starts/
224    fi
225    if [ -f "startpem.nc" ]; then
226        cp startpem.nc starts/
227    fi
228
229    # Create a file to manage years of the chained simulation and store some info from the PEM runs
230    echo $i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini > info_PEM.txt
231}
232
233# To submit the PCM runs
234# arg1: launching mode
235# arg2: counting method
236# arg3: number of PCM runs to launch
237# arg4: local number of the PCM run from which to start (optional)
238submitPCM() {
239    find . -type f -name "PCMrun*.job" ! -name "PCMrun.job" -delete
240    ii=1
241    if [ ! -z $4 ]; then
242        ii=$4
243    fi
244    if [ $(echo "$i_myear < $n_myear" | bc -l) -eq 1 ]; then
245        echo "Run \"PCM $iPCM\" ($ii/$3)"
246        if [ $1 -eq 0 ]; then # Mode: processing scripts
247            sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$ii - $3 + 2" | bc)/" PCMrun.job
248            ./PCMrun.job
249            if [ $? -ne 0 ]; then
250                errlaunch
251            fi
252        else # Mode: submitting jobs
253            cp PCMrun.job PCMrun${iPCM}.job
254            sed -i -E "/^$name_job/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${iPCM}\3/" PCMrun${iPCM}.job
255            sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$ii - $3 + 2" | bc)/" PCMrun${iPCM}.job
256            if [[ "$scheduler" == "SLURM" ]]; then
257                jobID=$(sbatch --parsable PCMrun${iPCM}.job)
258            elif [[ "$scheduler" == "PBS" ]]; then
259                jobID=$(qsub PCMrun${iPCM}.job | cut -d. -f1)
260            fi
261            # Create a file to cancel the dependent jobs of the cycle
262            echo "#!/bin/bash" > kill_launchPEM.sh
263            chmod +x kill_launchPEM.sh
264            echo $kill_job $jobID >> kill_launchPEM.sh
265        fi
266        ((iPCM++))
267        if [ $2 -ne 0 ]; then # Counting: PCM runs taken into account
268            i_myear=$(echo "$i_myear + 1." | bc -l)
269        fi
270        ((ii++))
271    else
272        endlaunch
273    fi
274    for ((i = $ii; i <= $3; i++)); do
275        if [ $(echo "$i_myear < $n_myear" | bc -l) -eq 1 ]; then
276            echo "Run \"PCM $iPCM\" ($i/$3)"
277            if [ $1 -eq 0 ]; then # Mode: processing scripts
278                sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$i - $3 + 2" | bc)/" PCMrun.job
279                ./PCMrun.job
280                if [ $? -ne 0 ]; then
281                    errlaunch
282                fi
283            else # Mode: submitting jobs
284                cp PCMrun.job PCMrun${iPCM}.job
285                sed -i -E "/^$name_job/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${iPCM}\3/" PCMrun${iPCM}.job
286                sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$i - $3 + 2" | bc)/" PCMrun${iPCM}.job
287                if [[ "$scheduler" == "SLURM" ]]; then
288                    jobID=$(sbatch --parsable --dependency=afterok:${jobID} PCMrun${iPCM}.job)
289                elif [[ "$scheduler" == "PBS" ]]; then
290                    jobID=$(qsub -W depend=afterok:${jobID} PCMrun${iPCM}.job | cut -d. -f1)
291                fi
292                echo $kill_job $jobID >> kill_launchPEM.sh
293            fi
294            ((iPCM++))
295            if [ $2 -ne 0 ]; then # Counting: PCM runs taken into account
296                i_myear=$(echo "$i_myear + 1." | bc -l)
297            fi
298        else
299            endlaunch
300        fi
301    done
302}
303
304# To submit the PEM run
305# arg1: launching mode
306submitPEM() {
307    if [ $(echo "$i_myear < $n_myear" | bc -l) -eq 1 ]; then
308        echo "Run \"PEM $iPEM\""
309        if [ $1 -eq 0 ]; then # Mode: processing scripts
310            ./PEMrun.job
311            if [ $? -ne 0 ]; then
312                errlaunch
313            fi
314        else # Mode: submitting jobs
315            sed -i -E "/^$name_job/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${iPEM}\3/" PEMrun.job
316            if [[ "$scheduler" == "SLURM" ]]; then
317                jobID=$(sbatch --parsable PEMrun.job)
318            elif [[ "$scheduler" == "PBS" ]]; then
319                jobID=$(qsub PEMrun.job | cut -d. -f1)
320            fi
321            # Create a file to cancel the dependent jobs of the cycle
322            echo "#!/bin/bash" > kill_launchPEM.sh
323            chmod +x kill_launchPEM.sh
324            echo $kill_job $jobID >> kill_launchPEM.sh
325        fi
326    else
327        endlaunch
328    fi
329}
330
331# To make one cycle of PCM and PEM runs
332# arg1: launching mode
333# arg2: counting method
334# arg3: number of PCM runs to launch
335# arg4: local number of the PCM run from which to start (optional)
336cyclelaunch() {
337    # PCM runs
338    submitPCM $1 $2 $3 $4
339
340    # PEM run
341    if [ $(echo "$i_myear < $n_myear" | bc -l) -eq 1 ]; then
342        echo "Run \"PEM $iPEM\""
343        if [ $1 -eq 0 ]; then # Mode: processing scripts
344            ./PEMrun.job
345            if [ $? -ne 0 ]; then
346                errlaunch
347            fi
348        else # Mode: submitting jobs
349            sed -i -E "/^$name_job/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${iPEM}\3/" PEMrun.job
350            if [[ "$scheduler" == "SLURM" ]]; then
351                jobID=$(sbatch --parsable --dependency=afterok:${jobID} PEMrun.job)
352            elif [[ "$scheduler" == "PBS" ]]; then
353                jobID=$(qsub -W depend=afterok:${jobID} PEMrun.job | cut -d. -f1)
354            fi
355            echo $kill_job $jobID >> kill_launchPEM.sh
356        fi
357    else
358        endlaunch
359    fi
360}
361
362# To clean files after the starting run of the relaunch
363# arg1: file name prefix to clean
364# arg2: file name extension to clean
365# arg3: file number from which to clean
366cleanfiles() {
367    prefix=$1
368    extension=$2
369    if [ -z "$extension" ]; then
370        for file in ${prefix}*; do
371            num=${file#$prefix}
372            if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then
373                rm $file
374            fi
375        done
376    else
377        for file in ${prefix}*${extension}; do
378            num=${file#$prefix}
379            num=${num%$extension}
380            if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then
381                rm $file
382            fi
383        done
384    fi
385}
386
387# To relaunch from PCM run
388# arg1: launching mode
389# arg2: counting method
390relaunchPCM() {
391    iPCM=$(($irelaunch + 1))
392    cleanfiles diags/diagfi .nc $irelaunch
393    cleanfiles diags/diagsoil .nc $irelaunch
394    cleanfiles diags/data2reshape .nc $irelaunch
395    cleanfiles logs/runPCM .log $irelaunch
396    cleanfiles starts/restart1D .txt $irelaunch
397    cleanfiles starts/restart .nc $irelaunch
398    cleanfiles starts/restartfi .nc $irelaunch
399    cp starts/restartfi${irelaunch}.nc startfi.nc
400    if [ -f "starts/restart${irelaunch}.nc" ]; then
401        cp starts/restart${irelaunch}.nc start.nc
402    elif [ -f "starts/restart1D${irelaunch}.txt" ]; then
403        cp starts/restart1D${irelaunch}.txt start1D.txt
404    fi
405    if [ $irelaunch -le $nPCM_ini ]; then
406        # PCM relaunch during the initialization cycle
407        iPEM=1
408        if [ $2 -ne 0 ]; then # Counting: PCM runs taken into account
409            i_myear=$irelaunch
410        else # Counting: only PEM runs count
411            i_myear=0
412        fi
413        sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
414        cleanfiles diags/diagpem .nc $(($iPEM - 1))
415        cleanfiles diags/diagsoilpem .nc $(($iPEM - 1))
416        cleanfiles logs/runPEM .log $(($iPEM - 1))
417        cleanfiles starts/restart1D_postPEM .txt $(($iPEM - 1))
418        cleanfiles starts/restart_postPEM .nc $(($iPEM - 1))
419        cleanfiles starts/restartfi_postPEM .nc $(($iPEM - 1))
420        cleanfiles starts/restartpem .nc $(($iPEM - 1))
421        rm -f startpem.nc
422        if [ -f "starts/startpem.nc" ]; then
423            cp starts/startpem.nc .
424        fi
425        if [ $irelaunch -eq $(($nPCM_ini - 1)) ]; then
426            cp diags/data2reshape${irelaunch}.nc data2reshape_Y1.nc
427            cyclelaunch $1 $2 $nPCM_ini $iPCM
428        elif [ $irelaunch -eq $nPCM_ini ]; then
429            cp diags/data2reshape$(($irelaunch - 1)).nc data2reshape_Y1.nc
430            cp diags/data2reshape${irelaunch}.nc data2reshape_Y2.nc
431            submitPEM $1 # The next job is a PEM run
432        else
433            cyclelaunch $1 $2 $nPCM_ini $iPCM
434        fi
435    else
436        # PCM relaunch during a cycle
437        iPEM=$(echo "($iPCM - $nPCM_ini)/$nPCM + 1" | bc)
438        il=$(echo "($irelaunch - $nPCM_ini + 1)%$nPCM + 1" | bc)
439        if [ $2 -ne 0 ]; then # Counting: PCM runs taken into account
440            i_myear=$(echo "$(awk "NR==$iPEM {printf \"%s\n\", \$3}" "info_PEM.txt") + $il" | bc -l)
441        else # Counting: only PEM runs count
442            i_myear=$(awk "NR==$iPEM {printf \"%s\n\", \$3}" "info_PEM.txt")
443        fi
444        sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
445        cleanfiles diags/diagpem .nc $(($iPEM - 1))
446        cleanfiles diags/diagsoilpem .nc $(($iPEM - 1))
447        cleanfiles logs/runPEM .log $(($iPEM - 1))
448        cleanfiles starts/restart1D_postPEM .txt $(($iPEM - 1))
449        cleanfiles starts/restart_postPEM .nc $(($iPEM - 1))
450        cleanfiles starts/restartfi_postPEM .nc $(($iPEM - 1))
451        cleanfiles starts/restartpem .nc $(($iPEM - 1))
452        cp starts/restartpem$(($iPEM - 1)).nc startpem.nc
453        if [ $il -eq $(($nPCM - 1)) ]; then # Second to last PCM run
454            cp diags/data2reshape${irelaunch}.nc data2reshape_Y1.nc
455            cyclelaunch $1 $2 $nPCM $(($il + 1))
456        elif [ $il -eq $nPCM ]; then # Last PCM run so the next job is a PEM run
457            cp diags/data2reshape$(($irelaunch - 1)).nc data2reshape_Y1.nc
458            cp diags/data2reshape${irelaunch}.nc data2reshape_Y2.nc
459            submitPEM $1
460        else
461            cyclelaunch $1 $2 $nPCM $(($il + 1))
462        fi
463    fi
464}
465
466# To relaunch from PEM run
467# arg1: launching mode
468# arg2: counting method
469relaunchPEM() {
470    iPEM=$(echo "$irelaunch + 1" | bc)
471    iPCM=$(echo "$nPCM_ini + $nPCM*($irelaunch - 1) + 1" | bc)
472    i_myear=$(awk "NR==$iPEM {printf \"%s\n\", \$3}" "info_PEM.txt")
473    sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
474    cleanfiles diags/diagfi .nc $(($iPCM - 1))
475    cleanfiles diags/diagsoil .nc $(($iPCM - 1))
476    cleanfiles logs/runPCM .log $(($iPCM - 1))
477    cleanfiles starts/restart1D .txt $(($iPCM - 1))
478    cleanfiles starts/restart .nc $(($iPCM - 1))
479    cleanfiles starts/restartfi .nc $(($iPCM - 1))
480    cleanfiles diags/data2reshape .nc $(($iPCM - 1))
481    cleanfiles diags/diagpem .nc $irelaunch
482    cleanfiles diags/diagsoilpem .nc $irelaunch
483    cleanfiles logs/runPEM .log $irelaunch
484    cleanfiles starts/restart1D_postPEM .txt $irelaunch
485    cleanfiles starts/restart_postPEM .nc $irelaunch
486    cleanfiles starts/restartfi_postPEM .nc $irelaunch
487    cleanfiles starts/restartpem .nc $irelaunch
488    cp starts/restartpem${irelaunch}.nc startpem.nc
489    cp starts/restartfi_postPEM${irelaunch}.nc startfi.nc
490    if [ -f "starts/restart_postPEM${irelaunch}.nc" ]; then
491        cp starts/restart_postPEM${irelaunch}.nc start.nc
492    elif [ -f "starts/restart1D_postPEM${irelaunch}.txt" ]; then
493        cp starts/restart1D_postPEM${irelaunch}.txt start1D.txt
494    fi
495    cyclelaunch $1 $2 $nPCM
496}
Note: See TracBrowser for help on using the repository browser.