Ignore:
Timestamp:
May 15, 2025, 6:26:24 PM (3 months ago)
Author:
jbclement
Message:

Mars PCM:
Cleaning and improvement of robustness for "run0" and "run_month1" scripts with file checks and clearer errors.
JBC

Location:
trunk/LMDZ.MARS/deftank/adastra
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/LMDZ.MARS/deftank/adastra/run0

    r3033 r3766  
    11#!/bin/bash
    2 ###########################################################################
     2########################################################################
    33# Script to perform several chained LMD Mars GCM simulations
    44# SET HERE the maximum total number of simulations
     
    99gcm=gcm_64x48x54_phymars_para.e
    1010#
    11 ###########################################################################
     11########################################################################
    1212set -x
    1313
    1414echo "---------------------------------------------------------"
    15 echo "starting run0"
     15echo "Starting run0..."
    1616
    1717dir=`pwd`
     
    1919address=`whoami`
    2020
    21 # Look for file "num_run" which should contain
     21# Check if GCM executable exists and is executable
     22if [ ! -x $gcm ]; then
     23  echo "Error: file \"$gcm\" not found or not executable in $dir!"
     24  exit 1
     25fi
     26
     27# Look for file "num_run" which should contain
    2228# the value of the previously computed season
    2329# (defaults to 0 if file "num_run" does not exist)
    2430if [[ -r num_run ]] ; then
    25   echo "found file num_run"
     31  echo "Found file \"num_run\""
    2632  numold=`cat num_run`
    2733else
    2834  numold=0
    2935fi
    30 echo "numold is set to" ${numold}
     36echo "numold is set to" $numold
    3137
    32 
    33 # Set value of current season
    34 (( numnew = ${numold} + 1 ))
    35 echo "numnew is set to" ${numnew}
     38# Set value of current season
     39numnew=$((numold + 1 ))
     40echo "numnew is set to" $numnew
    3641
    3742# Look for initialization data files (exit if none found)
     
    4045   \cp -f startfi${numold}.nc startfi.nc
    4146else
    42    if (( ${numold} == 99999 )) ; then
    43     echo "No run because previous run crashed ! (99999 in num_run)"
    44     exit
     47   if (( numold == 99999 )) ; then
     48     echo "Error: no run because previous run crashed! (99999 in \"num_run\")"
     49     exit 1
    4550   else
    46    echo "Where is file start"${numold}".nc??"
    47    exit
     51     echo "Error: missing input files \"start${numold}.nc\" or \"startfi${numold}.nc\" in $dir!"
     52     exit 1
    4853   fi
    4954fi
     
    5156
    5257# Run GCM
    53 srun --cpu-bind=threads --label -c${OMP_NUM_THREADS:=1} $gcm > lrun${numnew} 2>&1 
     58srun --cpu-bind=threads --label -c${OMP_NUM_THREADS:=1} $gcm > lrun${numnew} 2>&1
    5459
    5560
    5661# Check if run ended normaly and copy datafiles
    5762if [[ ( -r restartfi.nc  &&  -r restart.nc ) ]] ; then
    58   echo "Run seems to have ended normaly"
     63  echo "Run seems to have ended normally."
    5964  \mv -f restartfi.nc startfi${numnew}.nc
    6065  \mv -f restart.nc start${numnew}.nc
    6166else
     67  echo "Error: Run crashed or incomplete output!"
    6268  if [[ -r num_run ]] ; then
    6369    \mv -f num_run num_run.crash
    6470  else
    65     echo "No file num_run to build num_run.crash from !!"
     71    echo "No file num_run to build \"num_run.crash\" from!"
    6672    # Impose a default value of 0 for num_run
    6773    echo 0 > num_run.crash
    6874  fi
    69  echo 99999 > num_run
     75  echo 99999 > num_run
    7076############## To receive an Email message if the run crashes ########
    71 mail -s "crash run GCM" $address <<ENDMAIL
    72 The run on $machine in $dir has just crashed.
     77mail -s "Crash in GCM run" $address <<ENDMAIL
     78The GCM run on $machine in $dir has just crashed.
     79Check the output logs for more information.
    7380ENDMAIL
    74 ############################################""
    75  exit
     81#############################################
     82  exit 1
    7683fi
    7784
     
    93100# Prepare things for upcoming runs by writing
    94101# value of computed season in file num_run
    95 echo ${numnew} > num_run
     102echo $numnew > num_run
    96103
    97104# If we are over nummax : stop
    98105if (( $numnew + 1 > $nummax )) ; then
    99    exit
     106  exit 0
    100107else
    101    \cp -f run0 exe_mars
    102    ./exe_mars
    103 fi 
     108  \cp -f run0 exe_mars
     109  ./exe_mars
     110fi
  • trunk/LMDZ.MARS/deftank/adastra/run_month1

    r3033 r3766  
    44#SBATCH --constraint=GENOA
    55#SBATCH --nodes=1
    6 #SBATCH --ntasks-per-node=24 
     6#SBATCH --ntasks-per-node=24
    77#SBATCH --cpus-per-task=4
    88#SBATCH --threads-per-core=1 # --hint=nomultithread
    99#SBATCH --exclusive
    1010#SBATCH --output=run_month1_%A.out
    11 #SBATCH --time=01:25:00 
     11#SBATCH --time=01:25:00
    1212
    13 
     13## Script to run chained simulations
     14## (uses script "run0" and reference file "run.def.ref")
     15## Set values of "num_now" and "num_end" in the script below
     16## to set initial month # and final month # of the simulation
    1417
    1518# A few parameters that might need be changed depending on your setup:
    1619# Path to the arch.env to source
    1720source ../trunk/LMDZ.COMMON/arch.env
    18 # Number of threads to use (must be the same as "#SBATCH --cpus-per-task=" above) 
    19 export OMP_NUM_THREADS=4
     21# Number of threads to use (must be the same as "#SBATCH --cpus-per-task=" above)
     22export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
    2023export OMP_STACKSIZE=400M
     24#
     25########################################################################
     26set -exv
     27ls -al
     28trap 'echo -e "Error at line $LINENO!"' ERR
    2129
    22 set -xv
    23 ls -al
    24 
    25 ## set starting month and ending month below:
     30# Set starting month and ending month below:
    2631num_now=1
    2732num_end=12
    28 (( num_previous = $num_now - 1 ))
     33num_previous=$(( num_now - 1 ))
    2934
    3035echo "$num_previous" > num_run
    3136# next month number
    32 (( num_next = $num_now + 1 ))
     37num_next=$(( num_now + 1 ))
    3338# true (i.e. modulo 12) month number
    34 (( true_num = $num_now % 12 ))
     39true_num=$(( num_now % 12 ))
    3540
     41# Check if required files exist
    3642\rm -f  error; touch error
    3743
     44if [ ! -f run.def.ref ]; then
     45  echo "Error: file \"run.def.ref\" not found in current directory!" > error
     46  exit 1
     47fi
     48
     49if [ ! -x run0 ]; then
     50  echo "Error: file \"run0\" not found or not executable in current directory!" > error
     51  exit 1
     52fi
     53
     54if [ ! -f run_month$num_now ]; then
     55  echo "Error: file \"run_month${num_now}\" not found in current directory!" > error
     56  exit 1
     57fi
     58
     59# Run model depending on current month
    3860case $true_num in
    39   1 ) sed s/9999/61/ run.def.ref > run.def ; run0 >> error ;;    #1
    40   2 ) sed s/9999/66/ run.def.ref > run.def ; run0 >> error ;;    #2
    41   3 ) sed s/9999/66/ run.def.ref > run.def ; run0 >> error ;;    #3
    42   4 ) sed s/9999/65/ run.def.ref > run.def ; run0 >> error ;;    #4
    43   5 ) sed s/9999/60/ run.def.ref > run.def ; run0 >> error ;;    #5
    44   6 ) sed s/9999/54/ run.def.ref > run.def ; run0 >> error ;;    #6
    45   7 ) sed s/9999/50/ run.def.ref > run.def ; run0 >> error ;;    #7
    46   8 ) sed s/9999/46/ run.def.ref > run.def ; run0 >> error ;;    #8
    47   9 ) sed s/9999/47/ run.def.ref > run.def ; run0 >> error ;;    #9
    48  10 ) sed s/9999/47/ run.def.ref > run.def ; run0 >> error ;;    #10
    49  11 ) sed s/9999/51/ run.def.ref > run.def ; run0 >> error ;;    #11
    50   0 ) sed s/9999/56/ run.def.ref > run.def ; run0 >> error ;;    #12
    51   * ) echo "error" ;;
     61  1 ) sed s/9999/61/ run.def.ref > run.def ; ./run0 >> error ;;    #1
     62  2 ) sed s/9999/66/ run.def.ref > run.def ; ./run0 >> error ;;    #2
     63  3 ) sed s/9999/66/ run.def.ref > run.def ; ./run0 >> error ;;    #3
     64  4 ) sed s/9999/65/ run.def.ref > run.def ; ./run0 >> error ;;    #4
     65  5 ) sed s/9999/60/ run.def.ref > run.def ; ./run0 >> error ;;    #5
     66  6 ) sed s/9999/54/ run.def.ref > run.def ; ./run0 >> error ;;    #6
     67  7 ) sed s/9999/50/ run.def.ref > run.def ; ./run0 >> error ;;    #7
     68  8 ) sed s/9999/46/ run.def.ref > run.def ; ./run0 >> error ;;    #8
     69  9 ) sed s/9999/47/ run.def.ref > run.def ; ./run0 >> error ;;    #9
     70 10 ) sed s/9999/47/ run.def.ref > run.def ; ./run0 >> error ;;    #10
     71 11 ) sed s/9999/51/ run.def.ref > run.def ; ./run0 >> error ;;    #11
     72  0 ) sed s/9999/56/ run.def.ref > run.def ; ./run0 >> error ;;    #12
     73  * ) echo "Error: Invalid value of true_num ($true_num)" ; exit 1 ;;
    5274esac
    5375
    54 # launch job for next month
    55 if (( $num_next <= $num_end )) ; then
     76# Launch job for next month
     77if (( num_next <= num_end )) ; then
    5678  cp -f run_month$num_now tmp
    5779  sed -e "s@run_month${num_now}@run_month${num_next}@" \
    5880      -e "s@num_now=${num_now}@num_now=${num_next}@" tmp > run_month$num_next
    5981  rm tmp
    60  
    6182  sbatch run_month$num_next
    6283fi
Note: See TracChangeset for help on using the changeset viewer.