1 | #!/bin/bash |
---|
2 | # Adapted A. Barral 07/2024 from https://dci.dci-gitlab.cines.fr/webextranet/porting_optimization/detailed_binding_script.html#porting-optimization-detailed-binding-script |
---|
3 | # Used within exclusive nodes as srun --cpu-bind=none --mem-bind=none -- ./set_binding.sh <executable> <args> |
---|
4 | # Note: On Adastra, OMP_PLACES & OMP_PROC_BIND seems to have no effect (probably the default assignment is the same), but I left it there just in case for other configurations. |
---|
5 | |
---|
6 | set -eu |
---|
7 | |
---|
8 | function create_affinity_numactl() { |
---|
9 | local num_tasks=$1 # Number of MPI tasks |
---|
10 | local num_threads=$2 # Number of OMP threads per task |
---|
11 | |
---|
12 | AFFINITY_NUMACTL=() |
---|
13 | OMP_PLACES_ARR=() |
---|
14 | local start_cpu=0 |
---|
15 | local total_strands thread_per_core |
---|
16 | total_strands=392 # "$(lscpu | grep "^CPU(s):" | awk '{print $2}')" # Total physical threads in the system |
---|
17 | thread_per_core="$(lscpu | grep "^Thread(s) per core:" | awk '{print $4}')" |
---|
18 | local total_cpus=$(( total_strands / thread_per_core )) # Total physical cores in the system |
---|
19 | num_smt_used=$(( (num_tasks*num_threads+total_cpus-1)/total_cpus )) # ceil(total_threads_required/total_cpus) |
---|
20 | |
---|
21 | # Parameter range check |
---|
22 | if [[ $(( num_tasks * num_threads )) -gt $(( total_strands )) ]]; then |
---|
23 | echo "STOP: requesting more CPUs than available on the system!"; exit 1 |
---|
24 | fi |
---|
25 | if [[ $(( num_threads % num_smt_used )) -ne 0 ]]; then |
---|
26 | echo "STOP: OMP threads number ($num_threads) must be a multiple of $num_smt_used (using $num_smt_used out of $thread_per_core SMT threads per core)."; exit 1 |
---|
27 | fi |
---|
28 | |
---|
29 | for (( task=0; task<num_tasks; task++ )); do |
---|
30 | local range="" |
---|
31 | local range_omp="" |
---|
32 | for (( i_smt=0; i_smt<num_smt_used; i_smt++ )); do |
---|
33 | local smt_start_cpu=$((start_cpu + total_cpus * i_smt)) |
---|
34 | local smt_end_cpu=$((smt_start_cpu + num_threads / num_smt_used - 1)) |
---|
35 | range+=",${smt_start_cpu}-${smt_end_cpu}" |
---|
36 | range_omp+=",{${smt_start_cpu}}:$((num_threads / num_smt_used)):1" |
---|
37 | done |
---|
38 | range=$(echo "$range" | cut -c 2-) |
---|
39 | range_omp=$(echo "$range_omp" | cut -c 2-) |
---|
40 | AFFINITY_NUMACTL+=("$range") |
---|
41 | OMP_PLACES_ARR+=("$range_omp") |
---|
42 | start_cpu=$((start_cpu + num_threads / num_smt_used)) |
---|
43 | done |
---|
44 | } |
---|
45 | |
---|
46 | create_affinity_numactl "$SLURM_NTASKS_PER_NODE" "$OMP_NUM_THREADS" |
---|
47 | |
---|
48 | # Modulo arithmetic eases some corner use cases. |
---|
49 | LOCAL_RANK_INDEX="${SLURM_LOCALID}" |
---|
50 | CPU_SET="${AFFINITY_NUMACTL[$((LOCAL_RANK_INDEX % ${#AFFINITY_NUMACTL[@]}))]}" |
---|
51 | OMP_PLACES="${OMP_PLACES_ARR[$((LOCAL_RANK_INDEX % ${#OMP_PLACES_ARR[@]}))]}" |
---|
52 | |
---|
53 | if [[ $LOCAL_RANK_INDEX = 0 ]]; then |
---|
54 | echo "[$(hostname)] Number of used SMT: $num_smt_used" |
---|
55 | echo "[$(hostname)] AFFINITY_NUMACTL:" "${AFFINITY_NUMACTL[@]}" |
---|
56 | echo "[$(hostname)] OMP_PLACES:" "${OMP_PLACES_ARR[@]}" |
---|
57 | fi |
---|
58 | export OMP_PLACES="$OMP_PLACES" |
---|
59 | export OMP_PROC_BIND=CLOSE |
---|
60 | echo "[$(hostname)] Starting local rank ${LOCAL_RANK_INDEX} with: 'numactl --localalloc --physcpubind=${CPU_SET} --' | OMP_PLACES=$OMP_PLACES" |
---|
61 | exec numactl --localalloc --physcpubind="${CPU_SET}" -- "${@}" |
---|