#!/bin/bash ######################################################################## ######## Library of bash functions for the PEM workflow script ######### ######################################################################## # To end the workflow script end_workflow() { # Restore the previous value of LC_NUMERIC LC_NUMERIC=$OLD_LC_NUMERIC date echo "Success: the PEM workflow script completed normally!" exit 0 } # To abort the workflow script with error abort_workflow() { # Restore the previous value of LC_NUMERIC LC_NUMERIC=$OLD_LC_NUMERIC date echo "Error: an issue occured in the PEM workflow script!" exit 1 } # To check if the command exists # arg1: required command require_cmd() { if ! command -v "$1" >/dev/null 2>&1; then echo "Error: required command '$1' not found." echo "Please install or load the corresponding library." abort_workflow fi } # To check what is the job scheduler function detect_scheduler() { if command -v squeue &> /dev/null; then echo "SLURM is installed on $machine." job_scheduler="SLURM" job_name="#SBATCH --job-name=" kill_job="scancel" elif command -v qstat &> /dev/null; then echo "PBS/TORQUE is installed on $machine." job_scheduler="PBS" job_name="#PBS -N " kill_job="qdel" else echo "Error: neither SLURM nor TORQUE/PBS is installed on $machine!" echo "Please adapt the script to your job scheduler or set 'mode' to 0." abort_workflow fi } # To get the number of slopes for the simulation get_nslope() { ns=1 if [ -f "startfi.nc" ]; then ns=$(ncdump -h startfi.nc | sed -n 's/.*nslope = \([0-9]*\) ;.*/\1/p') else for f in run_pcm.def callphys.def; do if [[ -f "$f" ]]; then while IFS= read -r line; do # Remove leading whitespace trimmed=$(echo "$line" | sed 's/^[[:space:]]*//') # Skip lines that are commented out if [[ "$trimmed" == \#* ]]; then continue fi # Check if line contains 'nslope = N' if [[ "$trimmed" =~ ^nslope[[:space:]]*=[[:space:]]*([0-9]+) ]]; then ns="${BASH_REMATCH[1]}" break fi done < "$f" [[ -n "$ns" ]] && break fi done fi } # To modify the xml file according nslope config_xios_outputs() { tmp="tmp_file_def.xml" in_outdaily4pem=false in_outyearly4pem=false in_outdaily4pem_s=false in_outyearly4pem_s=false sed -i 's/enabled="\.true\.\">/enabled=".false.">/g' file_def_physics_mars.xml while IFS= read -r line; do # Detect file blocks case "$line" in *'' else line=' enabled=".false.">' fi elif $in_outdaily4pem_s || $in_outyearly4pem_s; then if [[ $ns -eq 1 ]]; then line=' enabled=".false.">' else line=' enabled=".true.">' fi fi fi # Handle slope variables if ( $in_outdaily4pem_s || $in_outyearly4pem_s ) && [[ $line =~ slope([0-9]+) ]]; then slope_id="${BASH_REMATCH[1]}" if (( 10#$slope_id > ns )); then # Ensure the line is commented if [[ $line != "" fi else # Ensure the line is uncommented if [[ $line == "}" # remove trailing --> fi fi fi # Leaving the file block case "$line" in *''*) in_outdaily4pem=false in_outyearly4pem=false in_outdaily4pem_s=false in_outyearly4pem_s=false ;; esac echo "$line" >> "$tmp" done < file_def_physics_mars.xml mv "$tmp" file_def_physics_mars.xml } # To check if a PCM run is one year check_run_yearly() { if [ -f "startfi.nc" ]; then year_sol=$(ncdump -v controle startfi.nc 2>/dev/null | \ sed -n '/controle =/,/;/p' | tr -d '[:space:]' | \ sed 's/.*=//; s/;//' | tr ',' '\n' | sed -n '14p') else echo "Warning: no \"startfi.nc\" found! So default year_sol=669 (Mars year) is taken..." year_sol=669 # Length of Martian year (sols) fi sol_in_file=$(awk -F'=' '/^[[:space:]]*(nday|ndt)[[:space:]]*=/ { val=$2 gsub(/^[[:space:]]+|[[:space:]]+$/,"",val) print val exit }' run_pcm.def) if [ -z "$sol_in_file" ]; then echo "Error: no length of year found in \"run_pcm.def\"!" abort_workflow elif [ "$sol_in_file" -eq "$year_sol" ]; then # Good: we do nothing : else echo "Error: length of year mismatch between \"run_pcm.def\" ($sol_in_file) and \"startfi.nc\" ($year_sol)!" abort_workflow fi } # To check the "callphys.def" compatibility to run with the PEM # arg1: callphys key check_callphys_key() { key="$1" line=$(grep -E "^[[:space:]]*${key}[[:space:]]*=" callphys.def \ | grep -v '^[[:space:]]*#' \ | tail -n 1) if [[ -z "$line" ]]; then echo "Error: the key '$key' is missing in \"callphys.def\"!" abort_workflow fi # Remove inline comments, extract value, normalize value=$(echo "$line" \ | cut -d'#' -f1 \ | cut -d'=' -f2 \ | tr -d '[:space:]' \ | tr '[:upper:]' '[:lower:]') if [[ "$value" == "true" || "$value" == ".true." ]]; then return 0 else echo "Error: the key '$key' must be true in \"callphys.def\"!" abort_workflow fi } # To check if everything necessary for the workflow script is ok check_workflow() { # Save the current value of LC_NUMERIC and set it to a locale that uses a dot as the decimal separator OLD_LC_NUMERIC=$LC_NUMERIC LC_NUMERIC=en_US.UTF-8 if [ -v n_planetary_years ] && [ ! -z "$n_planetary_years" ]; then if [ $(echo "$n_planetary_years <= 0." | bc -l) -eq 1 ]; then echo "Error: 'n_planetary_years' must be > 0!" abort_workflow fi elif [ -v n_earth_years ] && [ ! -z "$n_earth_years" ]; then if [ $(echo "$n_earth_years <= 0." | bc -l) -eq 1 ]; then echo "Error: 'n_earth_years' must be > 0!" abort_workflow fi else echo "Error: the number of years to be simulated is not set!" abort_workflow fi if [ $n_pcm_runs_ini -lt 2 ] || [ -z "$n_pcm_runs_ini" ]; then echo "Error: 'n_pcm_runs_ini' must be >= 2!" abort_workflow fi if [ $n_pcm_runs -lt 2 ] || [ -z "$n_pcm_runs" ]; then echo "Error: 'n_pcm_runs' must be >= 2!" abort_workflow fi if [ ! -f "pcm_run.job" ]; then echo "Error: file \"pcm_run.job\" does not exist in $dir!" abort_workflow fi if [ ! -f "pem_run.job" ]; then echo "Error: file \"pem_run.job\" does not exist in $dir!" abort_workflow fi if [ ! -f "run_pcm.def" ]; then echo "Error: file \"run_pcm.def\" does not exist in $dir!" abort_workflow fi if [ ! -f "run_pem.def" ]; then echo "Error: file \"run_pem.def\" does not exist in $dir!" abort_workflow fi if [ ! -f "callphys.def" ]; then echo "Error: file \"callphys.def\" does not exist in $dir!" abort_workflow fi if [ ! -f "z2sig.def" ]; then echo "Error: file \"z2sig.def\" does not exist in $dir!" abort_workflow fi if [ ! -f "traceur.def" ]; then echo "Error: file \"traceur.def\" does not exist in $dir!" abort_workflow fi if [ ! -f "context_lmdz_physics.xml" ]; then echo "Error: file \"context_lmdz_physics.xml\" does not exist in $dir!" abort_workflow fi if [ ! -f "field_def_physics_mars.xml" ]; then echo "Error: file \"field_def_physics_mars.xml\" does not exist in $dir!" abort_workflow fi if [ ! -f "file_def_physics_mars.xml" ]; then echo "Error: file \"file_def_physics_mars.xml\" does not exist in $dir!" abort_workflow fi if [ ! -f "iodef.xml" ]; then echo "Error: file \"iodef.xml\" does not exist in $dir!" abort_workflow fi if [ ! -f "obl_ecc_lsp.asc" ]; then echo "Warning: file \"obl_ecc_lsp.asc\" has not been found in $dir!" fi if [ ! -d "logs" ]; then mkdir logs fi if [ ! -d "starts" ]; then mkdir starts fi if [ ! -d "diags" ]; then mkdir diags fi if [ $exec_mode -ne 0 ]; then detect_scheduler fi require_cmd ncdump # Check if the "callphys.def" is compatible with the PEM check_callphys_key paleoclimate # Set automatically the XIOS output file for the PEM according to the number of slopes get_nslope config_xios_outputs # Check if a PCM run is one year check_run_yearly } # To convert Earth years into Mars years convert_earth2plnt_years() { myear=686.9725 # Number of Earth days in Martian year eyear=365.256363004 # Number of days in Earth year r_plnt2earth_yr=$(echo "$myear/$eyear" | bc -l) r_plnt2earth_yr=$(printf "%.4f" $r_plnt2earth_yr) # Rounding to the 4th decimal to respect the precision of Martian year if [ -v n_planetary_years ]; then ntot_yr_sim=$n_planetary_years echo "Number of years to be simulated: $ntot_yr_sim Martian years." elif [ -v n_earth_years ]; then ntot_yr_sim=$(echo "$n_earth_years/$r_plnt2earth_yr" | bc -l) echo "Number of years to be simulated: $n_earth_years Earth years = $ntot_yr_sim Martian years." fi } # To initialize the workflow script ini_workflow() { echo "This is a chained simulation for PEM and PCM runs in $dir on $machine by $user." convert_earth2plnt_years n_yr_sim=0. i_pem_run=1 i_pcm_run=1 if [ -f "startfi.nc" ]; then cp startfi.nc starts/ fi if [ -f "start.nc" ]; then cp start.nc starts/ elif [ -f "start1D.txt" ]; then cp start1D.txt starts/ fi if [ -f "startevo.nc" ]; then cp startevo.nc starts/ fi # Create a file to manage years of the chained simulation and store some info from the PEM runs echo $n_yr_sim $ntot_yr_sim $r_plnt2earth_yr $i_pcm_run $i_pem_run $n_pcm_runs $n_pcm_runs_ini > pem_workflow.sts } # To submit the PCM runs # arg1: execution mode # arg2: number of PCM runs to submit # arg3: local number of the PCM run from which to start (optional) submit_pcm_phase() { find . -type f -name "pcm_run*.job" ! -name "pcm_run.job" -delete ii=1 if [ ! -z $3 ]; then ii=$3 fi if [ $(echo "$n_yr_sim < $ntot_yr_sim" | bc -l) -eq 1 ]; then echo "Run \"PCM $i_pcm_run\" ($ii/$2)" if [ $1 -eq 0 ]; then # Mode: processing scripts sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$ii - $2 + 2" | bc)/" pcm_run.job ./pcm_run.job if [ $? -ne 0 ]; then abort_workflow fi else # Mode: submitting jobs cp pcm_run.job pcm_run${i_pcm_run}.job sed -i -E "/^$job_name/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${i_pcm_run}\3/" pcm_run${i_pcm_run}.job sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$ii - $2 + 2" | bc)/" pcm_run${i_pcm_run}.job if [[ "$job_scheduler" == "SLURM" ]]; then jobID=$(sbatch --parsable pcm_run${i_pcm_run}.job) elif [[ "$job_scheduler" == "PBS" ]]; then jobID=$(qsub pcm_run${i_pcm_run}.job | cut -d. -f1) fi # Create a file to cancel the dependent jobs of the cycle echo "#!/bin/bash" > kill_pem_workflow.sh chmod +x kill_pem_workflow.sh echo $kill_job $jobID >> kill_pem_workflow.sh fi ((i_pcm_run++)) ((ii++)) else end_workflow fi for ((i = $ii; i <= $2; i++)); do if [ $(echo "$n_yr_sim < $ntot_yr_sim" | bc -l) -eq 1 ]; then echo "Run \"PCM $i_pcm_run\" ($i/$2)" if [ $1 -eq 0 ]; then # Mode: processing scripts sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$i - $2 + 2" | bc)/" pcm_run.job ./pcm_run.job if [ $? -ne 0 ]; then abort_workflow fi else # Mode: submitting jobs cp pcm_run.job pcm_run${i_pcm_run}.job sed -i -E "/^$job_name/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${i_pcm_run}\3/" pcm_run${i_pcm_run}.job sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$i - $2 + 2" | bc)/" pcm_run${i_pcm_run}.job if [[ "$job_scheduler" == "SLURM" ]]; then jobID=$(sbatch --parsable --dependency=afterok:${jobID} pcm_run${i_pcm_run}.job) elif [[ "$job_scheduler" == "PBS" ]]; then jobID=$(qsub -W depend=afterok:${jobID} pcm_run${i_pcm_run}.job | cut -d. -f1) fi echo $kill_job $jobID >> kill_pem_workflow.sh fi ((i_pcm_run++)) else end_workflow fi done } # To submit the PEM run # arg1: execution mode submit_pem_phase() { if [ $(echo "$n_yr_sim < $ntot_yr_sim" | bc -l) -eq 1 ]; then echo "Run \"PEM $i_pem_run\"" if [ $1 -eq 0 ]; then # Mode: processing scripts ./pem_run.job if [ $? -ne 0 ]; then abort_workflow fi else # Mode: submitting jobs sed -i -E "/^$job_name/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${i_pem_run}\3/" pem_run.job if [[ "$job_scheduler" == "SLURM" ]]; then jobID=$(sbatch --parsable pem_run.job) elif [[ "$job_scheduler" == "PBS" ]]; then jobID=$(qsub pem_run.job | cut -d. -f1) fi # Create a file to cancel the dependent jobs of the cycle echo "#!/bin/bash" > kill_pem_workflow.sh chmod +x kill_pem_workflow.sh echo $kill_job $jobID >> kill_pem_workflow.sh fi else end_workflow fi } # To make one cycle of PCM and PEM runs # arg1: execution mode # arg2: number of PCM runs to submit # arg3: local number of the PCM run from which to start (optional) submit_cycle() { # PCM runs submit_pcm_phase $1 $2 $3 # PEM run if [ $(echo "$n_yr_sim < $ntot_yr_sim" | bc -l) -eq 1 ]; then echo "Run \"PEM $i_pem_run\"" if [ $1 -eq 0 ]; then # Mode: processing scripts ./pem_run.job if [ $? -ne 0 ]; then abort_workflow fi else # Mode: submitting jobs sed -i -E "/^$job_name/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${i_pem_run}\3/" pem_run.job if [[ "$job_scheduler" == "SLURM" ]]; then jobID=$(sbatch --parsable --dependency=afterok:${jobID} pem_run.job) elif [[ "$job_scheduler" == "PBS" ]]; then jobID=$(qsub -W depend=afterok:${jobID} pem_run.job | cut -d. -f1) fi echo $kill_job $jobID >> kill_pem_workflow.sh fi else end_workflow fi } # To clean-up files after resuming # arg1: file name prefix to clean # arg2: file name extension to clean # arg3: file number from which to clean cleanup() { prefix=$1 extension=$2 if [ -z "$extension" ]; then for file in ${prefix}*; do num=${file#$prefix} if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then rm $file fi done else for file in ${prefix}*${extension}; do num=${file#$prefix} num=${num%$extension} if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then rm $file fi done fi } # To resume workflow from PCM run # arg1: execution mode resume_from_pcm_run() { i_pcm_run=$(($i_resume + 1)) cleanup diags/diagfi .nc $i_resume cleanup diags/diagsoil .nc $i_resume cleanup diags/xoutdaily4pem .nc $i_resume cleanup diags/xoutyearly4pem .nc $i_resume cleanup logs/run_pcm .log $i_resume cleanup starts/restart1D .txt $i_resume cleanup starts/restart .nc $i_resume cleanup starts/restartfi .nc $i_resume cp starts/restartfi${i_resume}.nc startfi.nc if [ -f "starts/restart${i_resume}.nc" ]; then cp starts/restart${i_resume}.nc start.nc elif [ -f "starts/restart1D${i_resume}.txt" ]; then cp starts/restart1D${i_resume}.txt start1D.txt fi if [ $i_resume -le $n_pcm_runs_ini ]; then # PCM resumption during the initialization cycle i_pem_run=1 n_yr_sim=0 sed -i "1s/.*/$n_yr_sim $ntot_yr_sim $r_plnt2earth_yr $i_pcm_run $i_pem_run $n_pcm_runs $n_pcm_runs_ini/" pem_workflow.sts cleanup diags/diagevo .nc $(($i_pem_run - 1)) cleanup diags/diagevo_soil .nc $(($i_pem_run - 1)) cleanup logs/run_pem .log $(($i_pem_run - 1)) cleanup starts/restart1D_postpem .txt $(($i_pem_run - 1)) cleanup starts/restart_postpem .nc $(($i_pem_run - 1)) cleanup starts/restartfi_postpem .nc $(($i_pem_run - 1)) cleanup starts/restartevo .nc $(($i_pem_run - 1)) rm -f startevo.nc if [ -f "starts/startevo.nc" ]; then cp starts/startevo.nc . fi if [ $i_resume -eq $(($n_pcm_runs_ini - 1)) ]; then cp diags/xoutdaily4pem${i_resume}.nc xoutdaily4pem_y1.nc cp diags/xoutyearly4pem${i_resume}.nc xoutyearly4pem_y1.nc submit_cycle $1 $n_pcm_runs_ini $i_pcm_run elif [ $i_resume -eq $n_pcm_runs_ini ]; then cp diags/xoutdaily4pem$(($i_resume - 1)).nc xoutdaily4pem_y1.nc cp diags/xoutyearly4pem$(($i_resume - 1)).nc xoutyearly4pem_y1.nc cp diags/xoutdaily4pem${i_resume}.nc xoutdaily4pem_y2.nc cp diags/xoutyearly4pem${i_resume}.nc xoutyearly4pem_y2.nc submit_pem_phase $1 # The next job is a PEM run else submit_cycle $1 $n_pcm_runs_ini $i_pcm_run fi else # PCM resumption during a cycle i_pem_run=$(echo "($i_pcm_run - $n_pcm_runs_ini)/$n_pcm_runs + 1" | bc) il=$(echo "($i_resume - $n_pcm_runs_ini + 1)%$n_pcm_runs + 1" | bc) n_yr_sim=$(awk "NR==$i_pem_run {printf \"%s\n\", \$3}" "pem_workflow.sts") sed -i "1s/.*/$n_yr_sim $ntot_yr_sim $r_plnt2earth_yr $i_pcm_run $i_pem_run $n_pcm_runs $n_pcm_runs_ini/" pem_workflow.sts cleanup diags/diagevo .nc $(($i_pem_run - 1)) cleanup diags/diagevo_soil .nc $(($i_pem_run - 1)) cleanup logs/run_pem .log $(($i_pem_run - 1)) cleanup starts/restart1D_postpem .txt $(($i_pem_run - 1)) cleanup starts/restart_postpem .nc $(($i_pem_run - 1)) cleanup starts/restartfi_postpem .nc $(($i_pem_run - 1)) cleanup starts/restartevo .nc $(($i_pem_run - 1)) cp starts/restartevo$(($i_pem_run - 1)).nc startevo.nc if [ $il -eq $(($n_pcm_runs - 1)) ]; then # Second to last PCM run cp diags/xoutdaily4pem${i_resume}.nc xoutdaily4pem_y1.nc cp diags/xoutyearly4pem${i_resume}.nc xoutyearly4pem_y1.nc submit_cycle $1 $n_pcm_runs $(($il + 1)) elif [ $il -eq $n_pcm_runs ]; then # Last PCM run so the next job is a PEM run cp diags/xoutdaily4pem$(($i_resume - 1)).nc xoutdaily4pem_y1.nc cp diags/xoutyearly4pem$(($i_resume - 1)).nc xoutyearly4pem_y1.nc cp diags/xoutdaily4pem${i_resume}.nc xoutdaily4pem_y2.nc cp diags/xoutyearly4pem${i_resume}.nc xoutyearly4pem_y2.nc submit_pem_phase $1 else submit_cycle $1 $n_pcm_runs $(($il + 1)) fi fi } # To resume workflow from PEM run # arg1: execution mode resume_from_pem_run() { i_pem_run=$(echo "$i_resume + 1" | bc) i_pcm_run=$(echo "$n_pcm_runs_ini + $n_pcm_runs*($i_resume - 1) + 1" | bc) n_yr_sim=$(awk "NR==$i_pem_run {printf \"%s\n\", \$3}" "pem_workflow.sts") sed -i "1s/.*/$n_yr_sim $ntot_yr_sim $r_plnt2earth_yr $i_pcm_run $i_pem_run $n_pcm_runs $n_pcm_runs_ini/" pem_workflow.sts cleanup diags/diagfi .nc $(($i_pcm_run - 1)) cleanup diags/diagsoil .nc $(($i_pcm_run - 1)) cleanup logs/run_pcm .log $(($i_pcm_run - 1)) cleanup starts/restart1D .txt $(($i_pcm_run - 1)) cleanup starts/restart .nc $(($i_pcm_run - 1)) cleanup starts/restartfi .nc $(($i_pcm_run - 1)) cleanup diags/xoutdaily4pem .nc $(($i_pcm_run - 1)) cleanup diags/xoutyearly4pem .nc $(($i_pcm_run - 1)) cleanup diags/diagevo .nc $i_resume cleanup diags/diagevo_soil .nc $i_resume cleanup logs/run_pem .log $i_resume cleanup starts/restart1D_postpem .txt $i_resume cleanup starts/restart_postpem .nc $i_resume cleanup starts/restartfi_postpem .nc $i_resume cleanup starts/restartevo .nc $i_resume cp starts/restartevo${i_resume}.nc startevo.nc cp starts/restartfi_postpem${i_resume}.nc startfi.nc if [ -f "starts/restart_postpem${i_resume}.nc" ]; then cp starts/restart_postpem${i_resume}.nc start.nc elif [ -f "starts/restart1D_postpem${i_resume}.txt" ]; then cp starts/restart1D_postpem${i_resume}.txt start1D.txt fi submit_cycle $1 $n_pcm_runs }