source: trunk/LMDZ.COMMON/libf/evolution/deftank/lib_launchPEM.sh @ 3388

Last change on this file since 3388 was 3387, checked in by jbclement, 19 months ago

PEM:

  • Correction of the way the PEM stops before it reaches the SLURM time limit.
  • Small correction for 1D related to r3386.
  • Improvement of launching script.

JBC

  • Property svn:executable set to *
File size: 12.0 KB
Line 
1#!/bin/bash
2########################################################################
3######## Library of bash functions for the PEM launching script ########
4########################################################################
5
6# To end the launching script
7endlaunch() {
8    # Restore the previous value of LC_NUMERIC
9    LC_NUMERIC=$OLD_LC_NUMERIC
10
11    date
12    echo "Successful end of the launching script for the PEM simulation."
13    exit 0
14}
15
16# To end the launching script with error
17errlaunch() {
18    # Restore the previous value of LC_NUMERIC
19    LC_NUMERIC=$OLD_LC_NUMERIC
20
21    date
22    echo "End with error of the launching script for the PEM."
23    exit 1
24}
25
26# To check if SLURM is the job scheduler
27function is_slurm() {
28    if [ ! -x $(command -v squeue) ]; then
29        echo "Error: the job scheduler is not SLURM on $machine!"
30        echo "You need to adapt the script to your case."
31        errlaunch
32    fi
33}
34
35# To check if everything necessary for the launching script is ok
36checklaunch() {
37    # Save the current value of LC_NUMERIC and set it to a locale that uses a dot as the decimal separator
38    OLD_LC_NUMERIC=$LC_NUMERIC
39    LC_NUMERIC=en_US.UTF-8
40
41    is_slurm
42    if [ -v n_mars_years ] && [ ! -z "$n_mars_years" ]; then
43        if [ $n_mars_years -lt 1 ]; then
44            echo "Error: the value of 'n_mars_years' must be >0!"
45            errlaunch
46        fi
47    elif [ -v n_earth_years ] && [ ! -z "$n_earth_years" ]; then
48        if [ $n_earth_years -lt 1 ]; then
49            echo "Error: the value of 'n_earth_years' must be >0!"
50            errlaunch
51        fi
52    else
53        echo "Error: no number of years to be simulated has been set!"
54        errlaunch
55    fi
56    if [ $nPCM_ini -lt 2 ] || [ -z "$nPCM_ini" ]; then
57        echo "Error: the value of 'nPCM_ini' must be >1!"
58        errlaunch
59    fi
60    if [ $nPCM -lt 2 ] || [ -z "$nPCM" ]; then
61        echo "Error: the value of 'nPCM' must be >1!"
62        errlaunch
63    fi
64    if [ ! -f "jobPCM.slurm" ]; then
65        echo "Error: file \"jobPCM.slurm\" does not exist in $dir!"
66        errlaunch
67    fi
68    if [ ! -f "jobPEM.slurm" ]; then
69        echo "Error: file \"$jobPEM.slurm\" does not exist in $dir!"
70        errlaunch
71    fi
72    if [ ! -f "run_PCM.def" ]; then
73        echo "Error: file \"run_PCM.def\" does not exist in $dir!"
74        errlaunch
75    fi
76    if [ ! -f "run_PEM.def" ]; then
77        echo "Error: file \"run_PEM.def\" does not exist in $dir!"
78        errlaunch
79    fi
80    if [ ! -f "context_lmdz_physics.xml" ]; then
81        echo "Error: file \"context_lmdz_physics.xml\" does not exist in $dir!"
82        errlaunch
83    fi
84    if [ ! -f "field_def_physics_mars.xml" ]; then
85        echo "Error: file \"field_def_physics_mars.xml\" does not exist in $dir!"
86        errlaunch
87    fi
88    if [ ! -f "file_def_physics_mars.xml" ]; then
89        echo "Error: file \"file_def_physics_mars.xml\" does not exist in $dir!"
90        errlaunch
91    fi
92    if [ ! -f "iodef.xml" ]; then
93        echo "Error: file \"iodef.xml\" does not exist in $dir!"
94        errlaunch
95    fi
96    if [ ! -d "out_PCM" ]; then
97        mkdir out_PCM
98    fi
99    if [ ! -d "out_PEM" ]; then
100        mkdir out_PEM
101    fi
102    if [ ! -d "starts" ]; then
103        mkdir starts
104    fi
105    if [ ! -d "diags" ]; then
106        mkdir diags
107    fi
108}
109
110# To convert Earth years into Mars years
111convertyears() {
112    myear=686.9725      # Number of Earth days in Martian year
113    eyear=365.256363004 # Number of days in Earth year
114    convert_years=$(echo "$myear/$eyear" | bc -l)
115    convert_years=$(printf "%.4f" $convert_years) # Rounding to the 4th decimal to respect the precision of Martian year
116    if [ -v n_mars_years ]; then
117        n_myear=$n_mars_years
118        echo "Number of years to be simulated: $n_myear Martian years."
119    elif [ -v n_earth_years ]; then
120        n_myear=$(echo "($n_earth_years/$convert_years + 0.999999)/1" | bc) # Ceiling of n_earth_years/convert_years
121        echo "Number of years to be simulated: $n_earth_years Earth years = $n_myear Martian years."
122    fi
123}
124
125# To initialize the launching script
126initlaunch() {
127    echo "This is a chained simulation for PEM and PCM runs in $dir on $machine by $user."
128    convertyears
129    i_myear=0
130    iPEM=1
131    iPCM=1
132    cp startfi.nc starts/
133    if [ -f "start.nc" ]; then
134        cp start.nc starts/
135    elif [ -f "star1D.nc" ]; then
136        cp star1D.txt starts/
137    fi
138
139    # Create a file to manage years of the chained simulation and store some info from the PEM runs
140    echo $i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini > info_PEM.txt
141}
142
143# To submit the PCM runs
144# arg1: number of PCM runs to launch
145# arg2: local number of the PCM run from which to start (optional)
146submitPCM() {
147    find . -type f -name "jobPCM*.slurm" ! -name "jobPCM.slurm" -delete
148    ii=1
149    if [ ! -z $2 ]; then
150        ii=$2
151    fi
152    if [ $i_myear -lt $n_myear ]; then
153        echo "Run PCM $iPCM: call $ii/$1..."
154        cp jobPCM.slurm jobPCM${iPCM}.slurm
155        sed -i -E "s/(#SBATCH --job-name=[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPCM}/" jobPCM${iPCM}.slurm
156        sed -i "s/^k=[0-9]\+$/k=$(echo "3 - $nPCM_ini" | bc -l)/" jobPCM${iPCM}.slurm
157        jobID=$(sbatch --parsable jobPCM${iPCM}.slurm)
158        # Create a file to cancel the dependent jobs of the cycle
159        echo "#!/bin/bash" > kill_launchPEM.sh
160        chmod +x kill_launchPEM.sh
161        echo "scancel" $jobID >> kill_launchPEM.sh
162        ((iPCM++))
163        ((i_myear++))
164        ((ii++))
165    else
166        endlaunch
167    fi
168    for ((i = $ii; i <= $1; i++)); do
169        if [ $i_myear -lt $n_myear ]; then
170            echo "Run PCM $iPCM: call $i/$1..."
171            cp jobPCM.slurm jobPCM${iPCM}.slurm
172            sed -i -E "s/(#SBATCH --job-name=[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPCM}/" jobPCM${iPCM}.slurm
173            sed -i "s/^k=[0-9]\+$/k=$(echo "$i + 2 - $nPCM_ini" | bc -l)/" jobPCM${iPCM}.slurm
174            jobID=$(sbatch --parsable --dependency=afterok:${jobID} jobPCM${iPCM}.slurm)
175            echo "scancel" $jobID >> kill_launchPEM.sh
176            ((iPCM++))
177            ((i_myear++))
178        else
179            endlaunch
180        fi
181    done
182}
183
184# To submit the PEM run
185submitPEM() {
186    if [ $i_myear -lt $n_myear ]; then
187        echo "Run PEM $iPEM"
188        sed -i -E "s/(#SBATCH --job-name=[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPEM}/" jobPEM.slurm
189        jobID=$(sbatch --parsable jobPEM.slurm)
190        # Create a file to cancel the dependent jobs of the cycle
191        echo "#!/bin/bash" > kill_launchPEM.sh
192        chmod +x kill_launchPEM.sh
193        echo "scancel" $jobID >> kill_launchPEM.sh
194    else
195        endlaunch
196    fi
197}
198
199# To make one cycle of PCM and PEM runs
200# arg1: number of PCM runs to launch
201# arg2: local number of the PCM run from which to start (optional)
202cyclelaunch() {
203    # PCM runs
204    submitPCM $1 $2
205
206    # PEM run
207    if [ $i_myear -lt $n_myear ]; then
208        echo "Run PEM $iPEM"
209        sed -i -E "s/(#SBATCH --job-name=[^0-9]*[0-9]*[^0-9]*)[0-9]+$/\1${iPEM}/" jobPEM.slurm
210        jobID=$(sbatch --parsable --dependency=afterok:${jobID} jobPEM.slurm)
211        echo "scancel" $jobID >> kill_launchPEM.sh
212    else
213        endlaunch
214    fi
215}
216
217# To clean files after the starting run of the relaunch
218# arg1: file name prefix to clean
219# arg2: file name extension to clean
220# arg3: file number from which to clean
221cleanfiles() {
222    prefix=$1
223    extension=$2
224    if [ -z "$extension" ]; then
225        for file in ${prefix}*; do
226            num=${file#$prefix}
227            if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then
228                rm $file
229            fi
230        done
231    else
232        for file in ${prefix}*${extension}; do
233            num=${file#$prefix}
234            num=${num%$extension}
235            if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then
236                rm $file
237            fi
238        done
239    fi
240}
241
242# To relaunch from PCM run
243relaunchPCM() {
244    iPCM=$(($irelaunch + 1))
245    cleanfiles diags/diagfi .nc $irelaunch
246    cleanfiles diags/data2reshape .nc $irelaunch
247    cleanfiles "out_PCM/run" "" $irelaunch
248    cleanfiles starts/restart1D .txt $irelaunch
249    cleanfiles starts/restart .nc $irelaunch
250    cleanfiles starts/restartfi .nc $irelaunch
251    cp starts/restartfi${irelaunch}.nc startfi.nc
252    if [ -f "starts/restart${irelaunch}.nc" ]; then
253        cp starts/restart${irelaunch}.nc start.nc
254    elif [ -f "starts/restart1D${irelaunch}.txt" ]; then
255        cp starts/restart1D${irelaunch}.txt start1D.txt
256    fi
257    if [ $irelaunch -le $nPCM_ini ]; then
258        # PCM relaunch during the initialization cycle
259        iPEM=1
260        cleanfiles diags/diagpem .nc $iPEM
261        cleanfiles "out_PEM/run" "" $iPEM
262        cleanfiles starts/restart1D_postPEM .txt $iPEM
263        cleanfiles starts/restart_postPEM .nc $iPEM
264        cleanfiles starts/restartfi_postPEM .nc $iPEM
265        cleanfiles starts/restartpem .nc $iPEM
266        i_myear=$irelaunch
267        sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
268        rm -f startpem.nc
269        if [ $irelaunch -eq $(($nPCM_ini - 1)) ]; then
270            cp diags/data2reshape${irelaunch}.nc data2reshape_Y1.nc
271            cyclelaunch $nPCM_ini $irelaunch
272        elif [ $irelaunch -eq $nPCM_ini ]; then
273            cp diags/data2reshape$(($irelaunch - 1)).nc data2reshape_Y1.nc
274            cp diags/data2reshape${irelaunch}.nc data2reshape_Y2.nc
275            submitPEM # The next job is a PEM run
276        else
277            cyclelaunch $nPCM_ini $iPCM
278        fi
279    else
280        # PCM relaunch during a cycle
281        iPEM=$((($irelaunch - $nPCM_ini)/$nPCM + 1))
282        il=$(echo "($irelaunch - $nPCM_ini)%$nPCM" | bc -l)
283        cleanfiles diags/diagpem .nc $iPEM
284        cleanfiles "out_PEM/run" "" $iPEM
285        cleanfiles starts/restart1D_postPEM .txt $iPEM
286        cleanfiles starts/restart_postPEM .nc $iPEM
287        cleanfiles starts/restartfi_postPEM .nc $iPEM
288        cleanfiles starts/restartpem .nc $iPEM
289        cp starts/restartpem${iPEM}.nc startpem.nc
290        if [ $il -eq $(($nPCM - 1)) ]; then
291            i_myear=$(($(awk "NR==$iPEM {print \$1}" "info_PEM.txt") + $il))
292            sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
293            cp diags/data2reshape${irelaunch}.nc data2reshape_Y1.nc
294            cyclelaunch $nPCM $il
295        elif [ $il -eq 0 ]; then
296            i_myear=$(($(awk "NR==$iPEM {print \$1}" "info_PEM.txt") + $nPCM))
297            sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
298            cp diags/data2reshape$(($irelaunch - 1)).nc data2reshape_Y1.nc
299            cp diags/data2reshape${irelaunch}.nc data2reshape_Y2.nc
300            submitPEM # The next job is a PEM run
301        else
302            i_myear=$(($(awk "NR==$iPEM {print \$1}" "info_PEM.txt") + $il))
303            sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
304            cyclelaunch $nPCM $il
305        fi
306    fi
307}
308
309# To relaunch from PEM run
310relaunchPEM() {
311    iPEM=$irelaunch
312    iPCM=$(($nPCM_ini + ($nPCM - 1)*$irelaunch + 1))
313    i_myear=$(awk "NR==$(($iPEM + 1)) {print \$1}" "info_PEM.txt")
314    sed -i "1s/.*/$i_myear $n_myear $convert_years $iPCM $iPEM $nPCM $nPCM_ini/" info_PEM.txt
315    cleanfiles diags/diagfi .nc $(($iPCM - 1))
316    cleanfiles "out_PCM/run" "" $(($iPCM - 1))
317    cleanfiles starts/restart1D .txt $(($iPCM - 1))
318    cleanfiles starts/restart .nc $(($iPCM - 1))
319    cleanfiles starts/restartfi .nc $(($iPCM - 1))
320    cleanfiles diags/data2reshape .nc $(($iPCM - 1))
321    cleanfiles diags/diagpem .nc $irelaunch
322    cleanfiles "out_PEM/run" "" $irelaunch
323    cleanfiles starts/restart1D_postPEM .txt $irelaunch
324    cleanfiles starts/restart_postPEM .nc $irelaunch
325    cleanfiles starts/restartfi_postPEM .nc $irelaunch
326    cleanfiles starts/restartpem .nc $irelaunch
327    cp starts/restartpem${irelaunch}.nc startpem.nc
328    cp starts/restartfi_postPEM${irelaunch}.nc startfi.nc
329    if [ -f "starts/restart_postPEM${irelaunch}.nc" ]; then
330        cp starts/restart_postPEM${irelaunch}.nc start.nc
331    elif [ -f "starts/restart1D_postPEM${irelaunch}.txt" ]; then
332        cp starts/restart1D_postPEM${irelaunch}.txt start1D.txt
333    fi
334    cyclelaunch $nPCM
335}
Note: See TracBrowser for help on using the repository browser.