Ignore:
Timestamp:
May 15, 2025, 6:26:24 PM (3 months ago)
Author:
jbclement
Message:

Mars PCM:
Cleaning and improvement of robustness for "run0" and "run_month1" scripts with file checks and clearer errors.
JBC

Location:
trunk/LMDZ.MARS/deftank/occigen
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/LMDZ.MARS/deftank/occigen/run0

    r2216 r3766  
    11#!/bin/bash
    2 ###########################################################################
     2########################################################################
    33# Script to perform several chained LMD Mars GCM simulations
    44# SET HERE the maximum total number of simulations
     
    99gcm=gcm_64x48x73_phymars_para.e
    1010#
    11 ###########################################################################
     11########################################################################
    1212set -x
    1313
    1414echo "---------------------------------------------------------"
    15 echo "starting run0"
     15echo "Starting run0..."
    1616
    1717dir=`pwd`
     
    1919address=`whoami`
    2020
    21 # Look for file "num_run" which should contain
     21# Check if GCM executable exists and is executable
     22if [ ! -x $gcm ]; then
     23  echo "Error: file \"$gcm\" not found or not executable in $dir!"
     24  exit 1
     25fi
     26
     27# Look for file "num_run" which should contain
    2228# the value of the previously computed season
    2329# (defaults to 0 if file "num_run" does not exist)
    2430if [[ -r num_run ]] ; then
    25   echo "found file num_run"
     31  echo "Found file \"num_run\""
    2632  numold=`cat num_run`
    2733else
    2834  numold=0
    2935fi
    30 echo "numold is set to" ${numold}
     36echo "numold is set to" $numold
    3137
    32 
    33 # Set value of current season
    34 (( numnew = ${numold} + 1 ))
    35 echo "numnew is set to" ${numnew}
     38# Set value of current season
     39numnew=$((numold + 1 ))
     40echo "numnew is set to" $numnew
    3641
    3742# Look for initialization data files (exit if none found)
     
    4045   \cp -f startfi${numold}.nc startfi.nc
    4146else
    42    if (( ${numold} == 99999 )) ; then
    43     echo "No run because previous run crashed ! (99999 in num_run)"
    44     exit
     47   if (( numold == 99999 )) ; then
     48     echo "Error: no run because previous run crashed! (99999 in \"num_run\")"
     49     exit 1
    4550   else
    46    echo "Where is file start"${numold}".nc??"
    47    exit
     51     echo "Error: missing input files \"start${numold}.nc\" or \"startfi${numold}.nc\" in $dir!"
     52     exit 1
    4853   fi
    4954fi
     
    5661# Check if run ended normaly and copy datafiles
    5762if [[ ( -r restartfi.nc  &&  -r restart.nc ) ]] ; then
    58   echo "Run seems to have ended normaly"
     63  echo "Run seems to have ended normally."
    5964  \mv -f restartfi.nc startfi${numnew}.nc
    6065  \mv -f restart.nc start${numnew}.nc
    6166else
     67  echo "Error: Run crashed or incomplete output!"
    6268  if [[ -r num_run ]] ; then
    6369    \mv -f num_run num_run.crash
    6470  else
    65     echo "No file num_run to build num_run.crash from !!"
     71    echo "No file num_run to build \"num_run.crash\" from!"
    6672    # Impose a default value of 0 for num_run
    6773    echo 0 > num_run.crash
    6874  fi
    69  echo 99999 > num_run
     75  echo 99999 > num_run
    7076############## To receive an Email message if the run crashes ########
    71 mail -s "crash run GCM" $address <<ENDMAIL
    72 The run on $machine in $dir has just crashed.
     77mail -s "Crash in GCM run" $address <<ENDMAIL
     78The GCM run on $machine in $dir has just crashed.
     79Check the output logs for more information.
    7380ENDMAIL
    74 ############################################""
    75  exit
     81#############################################
     82  exit 1
    7683fi
    7784
     
    93100# Prepare things for upcoming runs by writing
    94101# value of computed season in file num_run
    95 echo ${numnew} > num_run
     102echo $numnew > num_run
    96103
    97104# If we are over nummax : stop
    98105if (( $numnew + 1 > $nummax )) ; then
    99    exit
     106  exit 0
    100107else
    101    \cp -f run0 exe_mars
    102    ./exe_mars
    103 fi 
     108  \cp -f run0 exe_mars
     109  ./exe_mars
     110fi
  • trunk/LMDZ.MARS/deftank/occigen/run_month1

    r2644 r3766  
    11#!/bin/bash
    2 #SBATCH --nodes=6                          #A
    3 #SBATCH --ntasks-per-node=4                #B
    4 #SBATCH --cpus-per-task=5                  #C
     2#SBATCH --nodes=6                           #A
     3#SBATCH --ntasks-per-node=4                 #B
     4#SBATCH --cpus-per-task=5                   #C
    55#SBATCH -J clim
    66#SBATCH --time=13:00:00
     
    1010#SBATCH --exclusive
    1111
    12 ## Script to run chained simulations on Occigen
     12## Script to run chained simulations
    1313## (uses script "run0" and reference file "run.def.ref")
    1414## Set values of "num_now" and "num_end" in the script below
    1515## to set initial month # and final month # of the simulation
    1616
    17 # source the environment (you might need to adapt this path to your case)
    18 source ../trunk/LMDZ.COMMON/arch/arch-X64_OCCIGEN.env
    19 
    20 set -xv
    21 ls -al
    22 
    23 ## set starting month and ending month below:
    24 num_now=1
    25 num_end=12
    26 (( num_previous = $num_now - 1 ))
    27 
    28 echo "$num_previous" > num_run
    29 # next month number
    30 (( num_next = $num_now + 1 ))
    31 # true (i.e. modulo 12) month number
    32 (( true_num = $num_now % 12 ))
    33 
    34 \rm -f  error; touch error
    35 
    36 # environment setup for OpenMP:
     17# A few parameters that might need be changed depending on your setup:
     18# Path to the arch.env to source
     19source ../trunk/LMDZ.COMMON/arch.env
     20# Environment setup for OpenMP:
    3721export I_MPI_DOMAIN=auto
    3822export I_MPI_PIN_RESPECT_CPUSET=0
    3923#Make sure that OMP_NUM_THREADS = cpus-per-task * KMP_HW_SUBSET
    4024export KMP_HW_SUBSET=1T
    41 export OMP_NUM_THREADS=5                    #C
     25export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK #C
    4226export OMP_STACKSIZE=400M
    4327export KMP_AFFINITY=granularity=fine,compact,1,0,verbose
     28#
     29########################################################################
     30set -exv
     31ls -al
     32trap 'echo -e "Error at line $LINENO!"' ERR
    4433
     34# Set starting month and ending month below:
     35num_now=1
     36num_end=12
     37num_previous=$(( num_now - 1 ))
     38
     39echo "$num_previous" > num_run
     40# next month number
     41num_next=$(( num_now + 1 ))
     42# true (i.e. modulo 12) month number
     43true_num=$(( num_now % 12 ))
     44
     45# Check if required files exist
     46\rm -f  error; touch error
     47
     48if [ ! -f run.def.ref ]; then
     49  echo "Error: file \"run.def.ref\" not found in current directory!" > error
     50  exit 1
     51fi
     52
     53if [ ! -x run0 ]; then
     54  echo "Error: file \"run0\" not found or not executable in current directory!" > error
     55  exit 1
     56fi
     57
     58if [ ! -f run_month$num_now ]; then
     59  echo "Error: file \"run_month${num_now}\" not found in current directory!" > error
     60  exit 1
     61fi
     62
     63# Run model depending on current month
    4564case $true_num in
    46   1 ) sed s/9999/61/ run.def.ref > run.def ; run0 >> error ;;    #1
    47   2 ) sed s/9999/66/ run.def.ref > run.def ; run0 >> error ;;    #2
    48   3 ) sed s/9999/66/ run.def.ref > run.def ; run0 >> error ;;    #3
    49   4 ) sed s/9999/65/ run.def.ref > run.def ; run0 >> error ;;    #4
    50   5 ) sed s/9999/60/ run.def.ref > run.def ; run0 >> error ;;    #5
    51   6 ) sed s/9999/54/ run.def.ref > run.def ; run0 >> error ;;    #6
    52   7 ) sed s/9999/50/ run.def.ref > run.def ; run0 >> error ;;    #7
    53   8 ) sed s/9999/46/ run.def.ref > run.def ; run0 >> error ;;    #8
    54   9 ) sed s/9999/47/ run.def.ref > run.def ; run0 >> error ;;    #9
    55  10 ) sed s/9999/47/ run.def.ref > run.def ; run0 >> error ;;    #10
    56  11 ) sed s/9999/51/ run.def.ref > run.def ; run0 >> error ;;    #11
    57   0 ) sed s/9999/56/ run.def.ref > run.def ; run0 >> error ;;    #12
    58   * ) echo "error" ;;
     65  1 ) sed s/9999/61/ run.def.ref > run.def ; ./run0 >> error ;;    #1
     66  2 ) sed s/9999/66/ run.def.ref > run.def ; ./run0 >> error ;;    #2
     67  3 ) sed s/9999/66/ run.def.ref > run.def ; ./run0 >> error ;;    #3
     68  4 ) sed s/9999/65/ run.def.ref > run.def ; ./run0 >> error ;;    #4
     69  5 ) sed s/9999/60/ run.def.ref > run.def ; ./run0 >> error ;;    #5
     70  6 ) sed s/9999/54/ run.def.ref > run.def ; ./run0 >> error ;;    #6
     71  7 ) sed s/9999/50/ run.def.ref > run.def ; ./run0 >> error ;;    #7
     72  8 ) sed s/9999/46/ run.def.ref > run.def ; ./run0 >> error ;;    #8
     73  9 ) sed s/9999/47/ run.def.ref > run.def ; ./run0 >> error ;;    #9
     74 10 ) sed s/9999/47/ run.def.ref > run.def ; ./run0 >> error ;;    #10
     75 11 ) sed s/9999/51/ run.def.ref > run.def ; ./run0 >> error ;;    #11
     76  0 ) sed s/9999/56/ run.def.ref > run.def ; ./run0 >> error ;;    #12
     77  * ) echo "Error: Invalid value of true_num ($true_num)" ; exit 1 ;;
    5978esac
    6079
    61 # launch job for next month
    62 if (( $num_next <= $num_end )) ; then
     80# Launch job for next month
     81if (( num_next <= num_end )) ; then
    6382  cp -f run_month$num_now tmp
    6483  sed -e "s@run_month${num_now}@run_month${num_next}@" \
    6584      -e "s@num_now=${num_now}@num_now=${num_next}@" tmp > run_month$num_next
    6685  rm tmp
    67  
    6886  sbatch run_month$num_next
    6987fi
    70 
Note: See TracChangeset for help on using the changeset viewer.