source: BOL/LMDZ_Setup_amaury/lmdz_env.sh @ 5178

Last change on this file since 5178 was 5178, checked in by abarral, 2 months ago

Fix libfabric crash on ADS
Increase rebuild job times

File size: 5.9 KB
Line 
1#!/bin/bash
2########################################################
3# This file loads the required modules and sets the paths for simulations
4# To modify the paths:
5#    1) In the <set_env> function, find the section corresponding to your machine
6#    2) Modify the variables, which are documented in the *) section of <set_env>
7# See the end of <set_env> for the explanation of each
8########################################################
9
10# <root_dir> will be set by sed by setup.sh here
11root_dir=/home/abarral/PycharmProjects/installLMDZ/LMDZ_Setup
12
13function get_hostname {
14  if grep -q "Adastra" /etc/motd; then
15    hostname="adastra"
16  elif which idrquota &> /dev/null; then
17    hostname="jean-zay"
18  else
19    hostname=$(hostname)
20  fi
21}
22
23function set_env {  # Platform-specific
24  case ${hostname:0:5} in
25    jean-)
26      module purge
27      compilo=19.0.4 # available 2013.0, 2017.2
28      module load intel-compilers/$compilo
29      #module load intel-mpi/$compilo
30      module load intel-mkl/$compilo
31      module load hdf5/1.10.5-mpi
32      module load netcdf/4.7.2-mpi
33      module load netcdf-fortran/4.5.2-mpi
34      module load subversion/1.9.7
35      #Pour module gcc, voir : https://trac.lmd.jussieu.fr/LMDZ/wiki/PortageJeanZay
36      #module load gcc/6.5.0
37      module load nco
38      module load cdo
39      # Imputation de la consommation sur le groupe (projet) actif par defaut,
40      #   idrproj indique le groupe (projet) actif par defaut
41      #   idrproj -d newproj   redefinit "newproj" en tant que  projet actif,
42      #        alors $STORE, $WORK etc vont designer les espaces de "newproj")
43      account="lmd"  # $(idrproj | grep active | awk '{ print $1}') doesn't work on compute nodes
44      ARCH="X64_JEANZAY"
45      SIMRUNBASEDIR="$SCRATCH/"
46      LMDZD="$root_dir/LMDZD"
47      LMDZ_INIT="$root_dir/LMDZ_Init"
48      NB_MPI_MAX=2000
49      NB_OMP_MAX=20
50      MPICMD="srun -n"
51      RUNBASHCMD="srun -A $account@cpu --label -n 1 -c"
52      SUBMITCMD="sbatch -A $account@cpu"
53      ;;
54    spiri)
55      module purge
56      module load subversion/1.13.0
57      module load gcc/11.2.0
58      module load openmpi/4.0.7
59      module load cdo/2.3.0
60
61      ARCH="X64_MESOIPSL-GNU"
62      SIMRUNBASEDIR="$SCRATCH/"
63      LMDZD="$root_dir/LMDZD"
64      LMDZ_INIT="$HOME/LMDZ_Init"
65      NB_MPI_MAX=5
66      NB_OMP_MAX=1
67      NB_CORE_PER_NODE_MAX=0
68      N_HYPERTHREADING=1
69      MPICMD="mpirun -np"  # on spirit, we can't run MPI using srun from within sbatch
70      RUNBASHCMD="bash"
71      SUBMITCMD="sbatch"
72      ;;
73    adast)
74      module purge
75      module load PrgEnv-gnu  # we need to load the env because lmdz links some shared libraries
76      module load gcc/13.2.0  # required, see https://dci.dci-gitlab.cines.fr/webextranet/user_support/index.html#prgenv-and-compilers
77      export CRAY_CPU_TARGET=x86-64  # to suppress warnings during Cmake netcdf95 build
78      export FI_CXI_RX_MATCH_MODE=hybrid  # 09/24 otherwise we get random SIGABRT e.g. "libfabric:2490616:1725895288::cxi:core:cxip_ux_onload_cb():2657<warn> c1456: RXC (0x5130:21) PtlTE 84:[Fatal] LE resources not recovered during flow control. FI_CXI_RX_MATCH_MODE=[hybrid|software] is required"
79
80      function cdo {  # cdo is available as a spack cmd which requires a specific, incompatible env
81        unset cdo
82        module purge
83        module load develop GCC-CPU-4.0.0 cdo/2.4.2-omp-mpi
84        cdo "$@"
85        set_env
86      }
87
88      function ferret {
89        unset ferret
90        module purge
91        module load GCC-CPU-3.1.0
92        module load ferret
93        ferret "$@"
94        set_env
95      }
96
97      account=$(/usr/sbin/my_project.py -l 2>&1 | head -1 | cut -d " " -f 3- | cut -c 5-)
98      ARCH="X64_ADASTRA-GNU"
99      SIMRUNBASEDIR="$SCRATCHDIR/"
100      LMDZD="$root_dir/LMDZD"
101      LMDZ_INIT="$WORKDIR/LMDZ_Init"
102      NB_MPI_MAX=2000
103      NB_OMP_MAX=200
104      NB_CORE_PER_NODE_MAX=192
105      N_HYPERTHREADING=1  # Adastra has SMT=2 enabled, but we found no actual performance improvement for the latlon model. Maybe useful for Dynamico ?
106      MPICMD="srun -n"
107#      RUNBASHCMD="srun --label --account=$account --constraint=GENOA --ntasks-per-node=1 -n 1 --time=00:15:00 -c"
108      RUNBASHCMD="bash"  # On Adastra the docs says we can use login nodes for compilation
109      SUBMITCMD="env $(env | grep -E "SLURM_|SBATCH_|SRUN_" | cut -d= -f1 | awk '{print "-u " $0}' | tr '\n' ' ' ) sbatch --constraint=GENOA --account=$account"  # we need to remove the existing SLURM variables otherwise they may be unexpectedly inherited by the submitted script
110      ;;
111    *) echo "WARNING: RUNNING THIS SCRIPT ON A LOCAL COMPUTER IS DISCOURAGED (lackluster cpus and memory)"
112      ARCH="local-gfortran-parallel"  # The arch file to use
113      SIMRUNBASEDIR="/tmp/SCRATCH/"  # Where the simulations will be executed ($SIMRUNBASEDIR/LMDZ_Setup/...)
114      LMDZD="$root_dir/LMDZD"  # Where the sources will be downloaded and compiled
115      LMDZ_INIT="$HOME/LMDZ_Init"  # Where to store shared files used for initialisation. Should be outside the LMDZ_Setup dir since it's shared between several LMDZ_Setup.
116      NB_MPI_MAX=2  # Max number of MPI cores (only for running simulations)
117      NB_OMP_MAX=2  # Max number of OMP threads (only for running simulations)
118      NB_CORE_PER_NODE_MAX=0  # Max number of cores per node (real cores, not hyperthreading - only for running simulations, cluster-specific)
119      N_HYPERTHREADING=1  # How many hyperthreading threads per physical core
120      MPICMD="mpirun -np" # command to run an mpi executable, as $MPICMD <nprocs> <script>
121      RUNBASHCMD="bash" # command to run a bash job, as $runbashcmd (nthreads) <script> [nthreads only supplied if =/="bash"]
122      SUBMITCMD="."  # command to sumbit a job, as $submitcmd <script>
123      ;;
124  esac
125}
126
127get_hostname
128echo "Setting up lmdz_env on $hostname"
129set_env
130
131if [[ ! (-d $root_dir && -f $root_dir/.lmdz_setup_root_dir && -f $root_dir/lmdz_env.sh) ]]; then
132  echo "STOP: root_dir $root_dir not found, either you are running on an unsupported cluster, or the initialisation failed midway"; exit 1
133fi
Note: See TracBrowser for help on using the repository browser.