source: BOL/LMDZ_Setup/slurm_set_cpu_binding.sh @ 5493

Last change on this file since 5493 was 5415, checked in by Laurent Fairhead, 5 weeks ago

New version of LMDZ_Setup as rewritten by A. Barral

  • Property svn:executable set to *
File size: 3.0 KB
Line 
1#!/bin/bash
2# Adapted A. Barral 07/2024 from https://dci.dci-gitlab.cines.fr/webextranet/porting_optimization/detailed_binding_script.html#porting-optimization-detailed-binding-script
3# Used within exclusive nodes as srun --cpu-bind=none --mem-bind=none -- ./set_binding.sh <executable> <args>
4# Note: On Adastra, OMP_PLACES & OMP_PROC_BIND seems to have no effect (probably the default assignment is the same), but I left it there just in case for other configurations.
5
6set -eu
7
8function create_affinity_numactl() {
9    local num_tasks=$1  # Number of MPI tasks
10    local num_threads=$2  # Number of OMP threads per task
11
12    AFFINITY_NUMACTL=()
13    OMP_PLACES_ARR=()
14    local start_cpu=0
15    local total_strands thread_per_core
16    total_strands=392  # "$(lscpu | grep "^CPU(s):" | awk '{print $2}')"  # Total physical threads in the system
17    thread_per_core="$(lscpu | grep "^Thread(s) per core:" | awk '{print $4}')"
18    local total_cpus=$(( total_strands / thread_per_core ))  # Total physical cores in the system
19    num_smt_used=$(( (num_tasks*num_threads+total_cpus-1)/total_cpus ))  # ceil(total_threads_required/total_cpus)
20
21    # Parameter range check
22    if [[ $(( num_tasks * num_threads )) -gt $(( total_strands )) ]]; then
23        echo "STOP: requesting more CPUs than available on the system!"; exit 1
24    fi
25    if [[ $(( num_threads % num_smt_used )) -ne 0 ]]; then
26        echo "STOP: OMP threads number ($num_threads) must be a multiple of $num_smt_used (using $num_smt_used out of $thread_per_core SMT threads per core)."; exit 1
27    fi
28
29    for (( task=0; task<num_tasks; task++ )); do
30        local range=""
31        local range_omp=""
32        for (( i_smt=0; i_smt<num_smt_used; i_smt++ )); do
33            local smt_start_cpu=$((start_cpu + total_cpus * i_smt))
34            local smt_end_cpu=$((smt_start_cpu + num_threads / num_smt_used - 1))
35            range+=",${smt_start_cpu}-${smt_end_cpu}"
36            range_omp+=",{${smt_start_cpu}}:$((num_threads / num_smt_used)):1"
37        done
38        range=$(echo "$range" | cut -c 2-)
39        range_omp=$(echo "$range_omp" | cut -c 2-)
40        AFFINITY_NUMACTL+=("$range")
41        OMP_PLACES_ARR+=("$range_omp")
42        start_cpu=$((start_cpu + num_threads / num_smt_used))
43    done
44}
45
46create_affinity_numactl "$SLURM_NTASKS_PER_NODE" "$OMP_NUM_THREADS"
47
48# Modulo arithmetic eases some corner use cases.
49LOCAL_RANK_INDEX="${SLURM_LOCALID}"
50CPU_SET="${AFFINITY_NUMACTL[$((LOCAL_RANK_INDEX % ${#AFFINITY_NUMACTL[@]}))]}"
51OMP_PLACES="${OMP_PLACES_ARR[$((LOCAL_RANK_INDEX % ${#OMP_PLACES_ARR[@]}))]}"
52
53if [[ $LOCAL_RANK_INDEX = 0 ]]; then
54  echo "[$(hostname)] Number of used SMT: $num_smt_used"
55  echo "[$(hostname)] AFFINITY_NUMACTL:" "${AFFINITY_NUMACTL[@]}"
56  echo "[$(hostname)] OMP_PLACES:" "${OMP_PLACES_ARR[@]}"
57fi
58export OMP_PLACES="$OMP_PLACES"
59export OMP_PROC_BIND=CLOSE
60echo "[$(hostname)] Starting local rank ${LOCAL_RANK_INDEX} with: 'numactl --localalloc --physcpubind=${CPU_SET} --' | OMP_PLACES=$OMP_PLACES"
61exec numactl --localalloc --physcpubind="${CPU_SET}" -- "${@}"
Note: See TracBrowser for help on using the repository browser.