#!/bin/bash
########################################################################
######## Library of bash functions for the PEM workflow script #########
########################################################################

# To end the workflow script
end_workflow() {
    # Restore the previous value of LC_NUMERIC
    LC_NUMERIC=$OLD_LC_NUMERIC
    date
    echo "Success: the PEM workflow script completed normally!"
    exit 0
}

# To abort the workflow script with error
abort_workflow() {
    # Restore the previous value of LC_NUMERIC
    LC_NUMERIC=$OLD_LC_NUMERIC

    date
    echo "Error: an issue occured in the PEM workflow script!"
    exit 1
}

# To check what is the job scheduler
function detect_scheduler() {
    if command -v squeue &> /dev/null; then
        echo "SLURM is installed on $machine."
        job_scheduler="SLURM"
        job_name="#SBATCH --job-name="
        kill_job="scancel"
    elif command -v qstat &> /dev/null; then
        echo "PBS/TORQUE is installed on $machine."
        job_scheduler="PBS"
        job_name="#PBS -N "
        kill_job="qdel"
    else
        echo "Error: neither SLURM nor TORQUE/PBS is installed on $machine!"
        echo "You need to adapt the script to your job job_scheduler or set 'mode' to 0."
        abort_workflow
    fi
}

# To get the number of slopes for the simulation
get_nslope() {
    ns=1
    if [ -f "startfi.nc" ]; then
        ns=$(ncdump -h startfi.nc | sed -n 's/.*nslope = \([0-9]*\) ;.*/\1/p')
    else
        for f in run_pcm.def callphys.def; do
            if [[ -f "$f" ]]; then
                while IFS= read -r line; do
                    # Remove leading whitespace
                    trimmed=$(echo "$line" | sed 's/^[[:space:]]*//')
                    # Skip lines that are commented out
                    if [[ "$trimmed" == \#* ]]; then
                        continue
                    fi
                    # Check if line contains 'nslope = N'
                    if [[ "$trimmed" =~ ^nslope[[:space:]]*=[[:space:]]*([0-9]+) ]]; then
                        ns="${BASH_REMATCH[1]}"
                        break
                    fi
                done < "$f"
                [[ -n "$ns" ]] && break
            fi
        done
    fi
}

# To modify the xml file according nslope
config_xios_outputs() {
    tmp="tmp_file_def.xml"
    in_outdaily4pem=false
    in_outyearly4pem=false
    in_outdaily4pem_s=false
    in_outyearly4pem_s=false

    sed -i 's/enabled="\.true\.\">/enabled=".false.">/g' file_def_physics_mars.xml

    while IFS= read -r line; do
        # Detect file blocks
        case "$line" in
            *'<file id="outdaily4pem"'*)
                in_outdaily4pem=true
                ;;
            *'<file id="outyearly4pem"'*)
                in_outyearly4pem=true
                ;;
            *'<file id="outdaily4pem_s"'*)
                in_outdaily4pem_s=true
                ;;
            *'<file id="outyearly4pem_s"'*)
                in_outyearly4pem_s=true
                ;;
        esac

        # Handle enabled attribute
        if [[ $line == *'enabled="'* ]]; then
            if $in_outdaily4pem || $in_outyearly4pem; then
                if [[ $ns -eq 1 ]]; then
                    line='              enabled=".true.">'
                else
                    line='              enabled=".false.">'
                fi
            elif $in_outdaily4pem_s || $in_outyearly4pem_s; then
                if [[ $ns -eq 1 ]]; then
                    line='              enabled=".false.">'
                else
                    line='              enabled=".true.">'
                fi
            fi
        fi

        # Handle slope variables
        if ( $in_outdaily4pem_s || $in_outyearly4pem_s ) && [[ $line =~ slope([0-9]+) ]]; then
            slope_id="${BASH_REMATCH[1]}"
            if (( 10#$slope_id > ns )); then
                # Ensure the line is commented
                if [[ $line != "<!--"* ]]; then
                    line="<!-- $line -->"
                fi
            else
                # Ensure the line is uncommented
                if [[ $line == "<!--"* ]]; then
                    line="${line#<!-- }" # remove leading <!-- 
                    line="${line% -->}"  # remove trailing -->
                fi
            fi
        fi


        # Leaving the file block
        case "$line" in
            *'</file>'*)
                in_outdaily4pem=false
                in_outyearly4pem=false
                in_outdaily4pem_s=false
                in_outyearly4pem_s=false
                ;;
        esac

        echo "$line" >> "$tmp"
    done < file_def_physics_mars.xml

    mv "$tmp" file_def_physics_mars.xml
}

# To check if a PCM run is one year
check_run_yearly() {
    if [ -f "startfi.nc" ]; then
        year_sol=$(ncdump -v controle startfi.nc 2>/dev/null | \
                   sed -n '/controle =/,/;/p' | tr -d '[:space:]' | \
                   sed 's/.*=//; s/;//' | tr ',' '\n' | sed -n '14p')
    else
        echo "Warning: no \"startfi.nc\" found! So default year_sol=669 (Mars year) is taken..."
        year_sol=669 # Length of Martian year (sols)
    fi
    sol_in_file=$(awk -F'=' '/^[[:space:]]*(nday|ndt)[[:space:]]*=/ {
                  val=$2
                  gsub(/^[[:space:]]+|[[:space:]]+$/,"",val)
                  print val
                  exit
                  }' run_pcm.def)

    if [ -z "$sol_in_file" ]; then
        echo "Error: no length of year found in \"run_pcm.def\"!"
        abort_workflow
    elif [ "$sol_in_file" -eq "$year_sol" ]; then
        # Good: we do nothing
        :
    else
        echo "Error: length of year mismatch between \"run_pcm.def\" ($sol_in_file) and \"startfi.nc\" ($year_sol)!"
        abort_workflow
    fi
}

# To check if everything necessary for the workflow script is ok
check_workflow() {
    # Save the current value of LC_NUMERIC and set it to a locale that uses a dot as the decimal separator
    OLD_LC_NUMERIC=$LC_NUMERIC
    LC_NUMERIC=en_US.UTF-8

    if [ -v n_planetary_years ] && [ ! -z "$n_planetary_years" ]; then
        if [ $(echo "$n_planetary_years <= 0." | bc -l) -eq 1 ]; then
            echo "Error: 'n_planetary_years' must be > 0!"
            abort_workflow
        fi
    elif [ -v n_earth_years ] && [ ! -z "$n_earth_years" ]; then
        if [ $(echo "$n_earth_years <= 0." | bc -l) -eq 1 ]; then
            echo "Error: 'n_earth_years' must be > 0!"
            abort_workflow
        fi
    else
        echo "Error: the number of years to be simulated is not set!"
        abort_workflow
    fi
    if [ $n_pcm_runs_ini -lt 2 ] || [ -z "$n_pcm_runs_ini" ]; then
        echo "Error: 'n_pcm_runs_ini' must be >= 2!"
        abort_workflow
    fi
    if [ $n_pcm_runs -lt 2 ] || [ -z "$n_pcm_runs" ]; then
        echo "Error: 'n_pcm_runs' must be >= 2!"
        abort_workflow
    fi
    if [ ! -f "pcm_run.job" ]; then
        echo "Error: file \"pcm_run.job\" does not exist in $dir!"
        abort_workflow
    fi
    if [ ! -f "pem_run.job" ]; then
        echo "Error: file \"pem_run.job\" does not exist in $dir!"
        abort_workflow
    fi
    if [ ! -f "run_pcm.def" ]; then
        echo "Error: file \"run_pcm.def\" does not exist in $dir!"
        abort_workflow
    fi
    if [ ! -f "run_pem.def" ]; then
        echo "Error: file \"run_pem.def\" does not exist in $dir!"
        abort_workflow
    fi
    if [ ! -f "context_lmdz_physics.xml" ]; then
        echo "Error: file \"context_lmdz_physics.xml\" does not exist in $dir!"
        abort_workflow
    fi
    if [ ! -f "field_def_physics_mars.xml" ]; then
        echo "Error: file \"field_def_physics_mars.xml\" does not exist in $dir!"
        abort_workflow
    fi
    if [ ! -f "file_def_physics_mars.xml" ]; then
        echo "Error: file \"file_def_physics_mars.xml\" does not exist in $dir!"
        abort_workflow
    fi
    if [ ! -f "iodef.xml" ]; then
        echo "Error: file \"iodef.xml\" does not exist in $dir!"
        abort_workflow
    fi
    if [ ! -d "logs" ]; then
        mkdir logs
    fi
    if [ ! -d "starts" ]; then
        mkdir starts
    fi
    if [ ! -d "diags" ]; then
        mkdir diags
    fi
    if [ $exec_mode -ne 0 ]; then
        detect_scheduler
    fi
    # Set automatically the XIOS output file for the PEM according to the number of slopes
    get_nslope
    config_xios_outputs
    # Check if a PCM run is one year
    check_run_yearly
}

# To convert Earth years into Mars years
convert_earth2plnt_years() {
    myear=686.9725      # Number of Earth days in Martian year
    eyear=365.256363004 # Number of days in Earth year
    r_plnt2earth_yr=$(echo "$myear/$eyear" | bc -l)
    r_plnt2earth_yr=$(printf "%.4f" $r_plnt2earth_yr) # Rounding to the 4th decimal to respect the precision of Martian year
    if [ -v n_planetary_years ]; then
        ntot_yr_sim=$n_planetary_years
        echo "Number of years to be simulated: $ntot_yr_sim Martian years."
    elif [ -v n_earth_years ]; then
        ntot_yr_sim=$(echo "$n_earth_years/$r_plnt2earth_yr" | bc -l)
        echo "Number of years to be simulated: $n_earth_years Earth years = $ntot_yr_sim Martian years."
    fi
}

# To initialize the workflow script
ini_workflow() {
    echo "This is a chained simulation for PEM and PCM runs in $dir on $machine by $user."
    convert_earth2plnt_years
    n_yr_sim=0.
    i_pem_run=1
    i_pcm_run=1
    if [ -f "startfi.nc" ]; then
        cp startfi.nc starts/
    fi
    if [ -f "start.nc" ]; then
        cp start.nc starts/
    elif [ -f "start1D.txt" ]; then
        cp start1D.txt starts/
    fi
    if [ -f "startpem.nc" ]; then
        cp startpem.nc starts/
    fi

    # Create a file to manage years of the chained simulation and store some info from the PEM runs
    echo $n_yr_sim $ntot_yr_sim $r_plnt2earth_yr $i_pcm_run $i_pem_run $n_pcm_runs $n_pcm_runs_ini > pem_workflow.sts
}

# To submit the PCM runs
# arg1: execution mode
# arg2: number of PCM runs to submit
# arg3: local number of the PCM run from which to start (optional)
submit_pcm_phase() {
    find . -type f -name "PCMrun*.job" ! -name "pcm_run.job" -delete
    ii=1
    if [ ! -z $3 ]; then
        ii=$3
    fi
    if [ $(echo "$n_yr_sim < $ntot_yr_sim" | bc -l) -eq 1 ]; then
        echo "Run \"PCM $i_pcm_run\" ($ii/$2)"
        if [ $1 -eq 0 ]; then # Mode: processing scripts
            sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$ii - $2 + 2" | bc)/" pcm_run.job
            ./pcm_run.job
            if [ $? -ne 0 ]; then
                abort_workflow
            fi
        else # Mode: submitting jobs
            cp pcm_run.job PCMrun${i_pcm_run}.job
            sed -i -E "/^$job_name/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${i_pcm_run}\3/" PCMrun${i_pcm_run}.job
            sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$ii - $2 + 2" | bc)/" PCMrun${i_pcm_run}.job
            if [[ "$job_scheduler" == "SLURM" ]]; then
                jobID=$(sbatch --parsable PCMrun${i_pcm_run}.job)
            elif [[ "$job_scheduler" == "PBS" ]]; then
                jobID=$(qsub PCMrun${i_pcm_run}.job | cut -d. -f1)
            fi
            # Create a file to cancel the dependent jobs of the cycle
            echo "#!/bin/bash" > kill_pem_workflow.sh
            chmod +x kill_pem_workflow.sh
            echo $kill_job $jobID >> kill_pem_workflow.sh
        fi
        ((i_pcm_run++))
        ((ii++))
    else
        end_workflow
    fi
    for ((i = $ii; i <= $2; i++)); do
        if [ $(echo "$n_yr_sim < $ntot_yr_sim" | bc -l) -eq 1 ]; then
            echo "Run \"PCM $i_pcm_run\" ($i/$2)"
            if [ $1 -eq 0 ]; then # Mode: processing scripts
                sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$i - $2 + 2" | bc)/" pcm_run.job
                ./pcm_run.job
                if [ $? -ne 0 ]; then
                    abort_workflow
                fi
            else # Mode: submitting jobs
                cp pcm_run.job PCMrun${i_pcm_run}.job
                sed -i -E "/^$job_name/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${i_pcm_run}\3/" PCMrun${i_pcm_run}.job
                sed -i "s/^k=-\?[0-9]\+$/k=$(echo "$i - $2 + 2" | bc)/" PCMrun${i_pcm_run}.job
                if [[ "$job_scheduler" == "SLURM" ]]; then
                    jobID=$(sbatch --parsable --dependency=afterok:${jobID} PCMrun${i_pcm_run}.job)
                elif [[ "$job_scheduler" == "PBS" ]]; then
                    jobID=$(qsub -W depend=afterok:${jobID} PCMrun${i_pcm_run}.job | cut -d. -f1)
                fi
                echo $kill_job $jobID >> kill_pem_workflow.sh
            fi
            ((i_pcm_run++))
        else
            end_workflow
        fi
    done
}

# To submit the PEM run
# arg1: execution mode
submit_pem_phase() {
    if [ $(echo "$n_yr_sim < $ntot_yr_sim" | bc -l) -eq 1 ]; then
        echo "Run \"PEM $i_pem_run\""
        if [ $1 -eq 0 ]; then # Mode: processing scripts
            ./pem_run.job
            if [ $? -ne 0 ]; then
                abort_workflow
            fi
        else # Mode: submitting jobs
            sed -i -E "/^$job_name/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${i_pem_run}\3/" pem_run.job
            if [[ "$job_scheduler" == "SLURM" ]]; then
                jobID=$(sbatch --parsable pem_run.job)
            elif [[ "$job_scheduler" == "PBS" ]]; then
                jobID=$(qsub pem_run.job | cut -d. -f1)
            fi
            # Create a file to cancel the dependent jobs of the cycle
            echo "#!/bin/bash" > kill_pem_workflow.sh
            chmod +x kill_pem_workflow.sh
            echo $kill_job $jobID >> kill_pem_workflow.sh
        fi
    else
        end_workflow
    fi
}

# To make one cycle of PCM and PEM runs
# arg1: execution mode
# arg2: number of PCM runs to submit
# arg3: local number of the PCM run from which to start (optional)
submit_cycle() {
    # PCM runs
    submit_pcm_phase $1 $2 $3

    # PEM run
    if [ $(echo "$n_yr_sim < $ntot_yr_sim" | bc -l) -eq 1 ]; then
        echo "Run \"PEM $i_pem_run\""
        if [ $1 -eq 0 ]; then # Mode: processing scripts
            ./pem_run.job
            if [ $? -ne 0 ]; then
                abort_workflow
            fi
        else # Mode: submitting jobs
            sed -i -E "/^$job_name/s/(.*[^0-9])([0-9]+)(_[^0-9]*)?$/\1${i_pem_run}\3/" pem_run.job
            if [[ "$job_scheduler" == "SLURM" ]]; then
                jobID=$(sbatch --parsable --dependency=afterok:${jobID} pem_run.job)
            elif [[ "$job_scheduler" == "PBS" ]]; then
                jobID=$(qsub -W depend=afterok:${jobID} pem_run.job | cut -d. -f1)
            fi
            echo $kill_job $jobID >> kill_pem_workflow.sh
        fi
    else
        end_workflow
    fi
}

# To clean-up files after resuming
# arg1: file name prefix to clean
# arg2: file name extension to clean
# arg3: file number from which to clean
cleanup() {
    prefix=$1
    extension=$2
    if [ -z "$extension" ]; then
        for file in ${prefix}*; do
            num=${file#$prefix}
            if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then
                rm $file
            fi
        done
    else
        for file in ${prefix}*${extension}; do
            num=${file#$prefix}
            num=${num%$extension}
            if [[ $num =~ ^[0-9]+$ ]] && [ $num -gt $3 ]; then
                rm $file
            fi
        done
    fi
}

# To resume workflow from PCM run
# arg1: execution mode
resume_from_pcm_run() {
    i_pcm_run=$(($i_resume + 1))
    cleanup diags/diagfi .nc $i_resume
    cleanup diags/diagsoil .nc $i_resume
    cleanup diags/Xoutdaily4pem .nc $i_resume
    cleanup diags/Xoutyearly4pem .nc $i_resume
    cleanup logs/runPCM .log $i_resume
    cleanup starts/restart1D .txt $i_resume
    cleanup starts/restart .nc $i_resume
    cleanup starts/restartfi .nc $i_resume
    cp starts/restartfi${i_resume}.nc startfi.nc
    if [ -f "starts/restart${i_resume}.nc" ]; then
        cp starts/restart${i_resume}.nc start.nc
    elif [ -f "starts/restart1D${i_resume}.txt" ]; then
        cp starts/restart1D${i_resume}.txt start1D.txt
    fi
    if [ $i_resume -le $n_pcm_runs_ini ]; then
        # PCM resumption during the initialization cycle
        i_pem_run=1
        n_yr_sim=0
        sed -i "1s/.*/$n_yr_sim $ntot_yr_sim $r_plnt2earth_yr $i_pcm_run $i_pem_run $n_pcm_runs $n_pcm_runs_ini/" pem_workflow.sts
        cleanup diags/diagevol .nc $(($i_pem_run - 1))
        cleanup diags/diagevol_soil .nc $(($i_pem_run - 1))
        cleanup logs/runPEM .log $(($i_pem_run - 1))
        cleanup starts/restart1D_postPEM .txt $(($i_pem_run - 1))
        cleanup starts/restart_postPEM .nc $(($i_pem_run - 1))
        cleanup starts/restartfi_postPEM .nc $(($i_pem_run - 1))
        cleanup starts/restartpem .nc $(($i_pem_run - 1))
        rm -f startpem.nc
        if [ -f "starts/startpem.nc" ]; then
            cp starts/startpem.nc .
        fi
        if [ $i_resume -eq $(($n_pcm_runs_ini - 1)) ]; then
            cp diags/Xoutdaily4pem${i_resume}.nc Xoutdaily4pem_Y1.nc
            cp diags/Xoutyearly4pem${i_resume}.nc Xoutyearly4pem_Y1.nc
            submit_cycle $1 $n_pcm_runs_ini $i_pcm_run
        elif [ $i_resume -eq $n_pcm_runs_ini ]; then
            cp diags/Xoutdaily4pem$(($i_resume - 1)).nc Xoutdaily4pem_Y1.nc
            cp diags/Xoutyearly4pem$(($i_resume - 1)).nc Xoutyearly4pem_Y1.nc
            cp diags/Xoutdaily4pem${i_resume}.nc Xoutdaily4pem_Y2.nc
            cp diags/Xoutyearly4pem${i_resume}.nc Xoutyearly4pem_Y2.nc
            submit_pem_phase $1 # The next job is a PEM run
        else
            submit_cycle $1 $n_pcm_runs_ini $i_pcm_run
        fi
    else
        # PCM resumption during a cycle
        i_pem_run=$(echo "($i_pcm_run - $n_pcm_runs_ini)/$n_pcm_runs + 1" | bc)
        il=$(echo "($i_resume - $n_pcm_runs_ini + 1)%$n_pcm_runs + 1" | bc)
        n_yr_sim=$(awk "NR==$i_pem_run {printf \"%s\n\", \$3}" "pem_workflow.sts")
        sed -i "1s/.*/$n_yr_sim $ntot_yr_sim $r_plnt2earth_yr $i_pcm_run $i_pem_run $n_pcm_runs $n_pcm_runs_ini/" pem_workflow.sts
        cleanup diags/diagevol .nc $(($i_pem_run - 1))
        cleanup diags/diagevol_soil .nc $(($i_pem_run - 1))
        cleanup logs/runPEM .log $(($i_pem_run - 1))
        cleanup starts/restart1D_postPEM .txt $(($i_pem_run - 1))
        cleanup starts/restart_postPEM .nc $(($i_pem_run - 1))
        cleanup starts/restartfi_postPEM .nc $(($i_pem_run - 1))
        cleanup starts/restartpem .nc $(($i_pem_run - 1))
        cp starts/restartpem$(($i_pem_run - 1)).nc startpem.nc
        if [ $il -eq $(($n_pcm_runs - 1)) ]; then # Second to last PCM run
            cp diags/Xoutdaily4pem${i_resume}.nc Xoutdaily4pem_Y1.nc
            cp diags/Xoutyearly4pem${i_resume}.nc Xoutyearly4pem_Y1.nc
            submit_cycle $1 $n_pcm_runs $(($il + 1))
        elif [ $il -eq $n_pcm_runs ]; then # Last PCM run so the next job is a PEM run
            cp diags/Xoutdaily4pem$(($i_resume - 1)).nc Xoutdaily4pem_Y1.nc
            cp diags/Xoutyearly4pem$(($i_resume - 1)).nc Xoutyearly4pem_Y1.nc
            cp diags/Xoutdaily4pem${i_resume}.nc Xoutdaily4pem_Y2.nc
            cp diags/Xoutyearly4pem${i_resume}.nc Xoutyearly4pem_Y2.nc
            submit_pem_phase $1
        else
            submit_cycle $1 $n_pcm_runs $(($il + 1))
        fi
    fi
}

# To resume workflow from PEM run
# arg1: execution mode
resume_from_pem_run() {
    i_pem_run=$(echo "$i_resume + 1" | bc)
    i_pcm_run=$(echo "$n_pcm_runs_ini + $n_pcm_runs*($i_resume - 1) + 1" | bc)
    n_yr_sim=$(awk "NR==$i_pem_run {printf \"%s\n\", \$3}" "pem_workflow.sts")
    sed -i "1s/.*/$n_yr_sim $ntot_yr_sim $r_plnt2earth_yr $i_pcm_run $i_pem_run $n_pcm_runs $n_pcm_runs_ini/" pem_workflow.sts
    cleanup diags/diagfi .nc $(($i_pcm_run - 1))
    cleanup diags/diagsoil .nc $(($i_pcm_run - 1))
    cleanup logs/runPCM .log $(($i_pcm_run - 1))
    cleanup starts/restart1D .txt $(($i_pcm_run - 1))
    cleanup starts/restart .nc $(($i_pcm_run - 1))
    cleanup starts/restartfi .nc $(($i_pcm_run - 1))
    cleanup diags/Xoutdaily4pem .nc $(($i_pcm_run - 1))
    cleanup diags/Xoutyearly4pem .nc $(($i_pcm_run - 1))
    cleanup diags/diagevol .nc $i_resume
    cleanup diags/diagevol_soil .nc $i_resume
    cleanup logs/runPEM .log $i_resume
    cleanup starts/restart1D_postPEM .txt $i_resume
    cleanup starts/restart_postPEM .nc $i_resume
    cleanup starts/restartfi_postPEM .nc $i_resume
    cleanup starts/restartpem .nc $i_resume
    cp starts/restartpem${i_resume}.nc startpem.nc
    cp starts/restartfi_postPEM${i_resume}.nc startfi.nc
    if [ -f "starts/restart_postPEM${i_resume}.nc" ]; then
        cp starts/restart_postPEM${i_resume}.nc start.nc
    elif [ -f "starts/restart1D_postPEM${i_resume}.txt" ]; then
        cp starts/restart1D_postPEM${i_resume}.txt start1D.txt
    fi
    submit_cycle $1 $n_pcm_runs
}
