[5052] | 1 | #!/bin/bash |
---|
| 2 | # Adapted A. Barral 07/2024 from https://dci.dci-gitlab.cines.fr/webextranet/porting_optimization/detailed_binding_script.html#porting-optimization-detailed-binding-script |
---|
| 3 | # Used within exclusive nodes as srun --cpu-bind=none --mem-bind=none -- ./set_binding.sh <executable> <args> |
---|
| 4 | # Note: On Adastra, OMP_PLACES & OMP_PROC_BIND seems to have no effect (probably the default assignment is the same), but I left it there just in case for other configurations. |
---|
| 5 | |
---|
| 6 | set -eu |
---|
| 7 | |
---|
| 8 | function create_affinity_numactl() { |
---|
| 9 | local num_tasks=$1 # Number of MPI tasks |
---|
| 10 | local num_threads=$2 # Number of OMP threads per task |
---|
| 11 | |
---|
| 12 | AFFINITY_NUMACTL=() |
---|
| 13 | OMP_PLACES_ARR=() |
---|
| 14 | local start_cpu=0 |
---|
| 15 | local total_strands thread_per_core |
---|
| 16 | total_strands=392 # "$(lscpu | grep "^CPU(s):" | awk '{print $2}')" # Total physical threads in the system |
---|
| 17 | thread_per_core="$(lscpu | grep "^Thread(s) per core:" | awk '{print $4}')" |
---|
| 18 | local total_cpus=$(( total_strands / thread_per_core )) # Total physical cores in the system |
---|
| 19 | num_smt_used=$(( (num_tasks*num_threads+total_cpus-1)/total_cpus )) # ceil(total_threads_required/total_cpus) |
---|
| 20 | |
---|
| 21 | # Parameter range check |
---|
| 22 | if [[ $(( num_tasks * num_threads )) -gt $(( total_strands )) ]]; then |
---|
| 23 | echo "STOP: requesting more CPUs than available on the system!"; exit 1 |
---|
| 24 | fi |
---|
| 25 | if [[ $(( num_threads % num_smt_used )) -ne 0 ]]; then |
---|
| 26 | echo "STOP: OMP threads number ($num_threads) must be a multiple of $num_smt_used (using $num_smt_used out of $thread_per_core SMT threads per core)."; exit 1 |
---|
| 27 | fi |
---|
| 28 | |
---|
| 29 | for (( task=0; task<num_tasks; task++ )); do |
---|
| 30 | local range="" |
---|
| 31 | local range_omp="" |
---|
| 32 | for (( i_smt=0; i_smt<num_smt_used; i_smt++ )); do |
---|
| 33 | local smt_start_cpu=$((start_cpu + total_cpus * i_smt)) |
---|
| 34 | local smt_end_cpu=$((smt_start_cpu + num_threads / num_smt_used - 1)) |
---|
| 35 | range+=",${smt_start_cpu}-${smt_end_cpu}" |
---|
| 36 | range_omp+=",{${smt_start_cpu}}:$((num_threads / num_smt_used)):1" |
---|
| 37 | done |
---|
| 38 | range=$(echo "$range" | cut -c 2-) |
---|
| 39 | range_omp=$(echo "$range_omp" | cut -c 2-) |
---|
| 40 | AFFINITY_NUMACTL+=("$range") |
---|
| 41 | OMP_PLACES_ARR+=("$range_omp") |
---|
| 42 | start_cpu=$((start_cpu + num_threads / num_smt_used)) |
---|
| 43 | done |
---|
| 44 | } |
---|
| 45 | |
---|
[5078] | 46 | create_affinity_numactl "$SLURM_NTASKS_PER_NODE" "$OMP_NUM_THREADS" |
---|
[5052] | 47 | |
---|
| 48 | # Modulo arithmetic eases some corner use cases. |
---|
| 49 | LOCAL_RANK_INDEX="${SLURM_LOCALID}" |
---|
| 50 | CPU_SET="${AFFINITY_NUMACTL[$((LOCAL_RANK_INDEX % ${#AFFINITY_NUMACTL[@]}))]}" |
---|
| 51 | OMP_PLACES="${OMP_PLACES_ARR[$((LOCAL_RANK_INDEX % ${#OMP_PLACES_ARR[@]}))]}" |
---|
| 52 | |
---|
| 53 | if [[ $LOCAL_RANK_INDEX = 0 ]]; then |
---|
| 54 | echo "[$(hostname)] Number of used SMT: $num_smt_used" |
---|
| 55 | echo "[$(hostname)] AFFINITY_NUMACTL:" "${AFFINITY_NUMACTL[@]}" |
---|
| 56 | echo "[$(hostname)] OMP_PLACES:" "${OMP_PLACES_ARR[@]}" |
---|
| 57 | fi |
---|
| 58 | export OMP_PLACES="$OMP_PLACES" |
---|
| 59 | export OMP_PROC_BIND=CLOSE |
---|
| 60 | echo "[$(hostname)] Starting local rank ${LOCAL_RANK_INDEX} with: 'numactl --localalloc --physcpubind=${CPU_SET} --' | OMP_PLACES=$OMP_PLACES" |
---|
| 61 | exec numactl --localalloc --physcpubind="${CPU_SET}" -- "${@}" |
---|