From 5f0000ac0c19c97992e26719d1a699afdfb559e7 Mon Sep 17 00:00:00 2001 From: Grigorii Rochev <31252905+Uburro@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:51:28 +0100 Subject: [PATCH 01/12] MSP-3724: fix Supervisor entrypoint (#269) * MSP-3724: fix Supervisor entrypoint --- images/worker/slurmd.dockerfile | 8 +- images/worker/slurmd_entrypoint.sh | 91 ----------------------- images/worker/sshd_entrypoint.sh | 12 --- images/worker/supervisord_entrypoint.sh | 98 +++++++++++++++++++++++++ internal/render/worker/configmap.go | 2 +- 5 files changed, 103 insertions(+), 108 deletions(-) delete mode 100644 images/worker/sshd_entrypoint.sh create mode 100644 images/worker/supervisord_entrypoint.sh diff --git a/images/worker/slurmd.dockerfile b/images/worker/slurmd.dockerfile index 46335354..00494aea 100644 --- a/images/worker/slurmd.dockerfile +++ b/images/worker/slurmd.dockerfile @@ -132,9 +132,9 @@ RUN mkdir -p /var/log/slurm/multilog && \ COPY worker/slurmd_entrypoint.sh /opt/bin/slurm/ RUN chmod +x /opt/bin/slurm/slurmd_entrypoint.sh -# Copy sshd entrypoint script -COPY worker/sshd_entrypoint.sh /opt/bin/slurm/ -RUN chmod +x /opt/bin/slurm/sshd_entrypoint.sh +# Copy supervisord entrypoint script +COPY worker/supervisord_entrypoint.sh /opt/bin/slurm/ +RUN chmod +x /opt/bin/slurm/supervisord_entrypoint.sh # Start supervisord that manages both slurmd and sshd as child processes -ENTRYPOINT ["/usr/bin/supervisord"] +ENTRYPOINT ["/opt/bin/slurm/supervisord_entrypoint.sh"] diff --git a/images/worker/slurmd_entrypoint.sh b/images/worker/slurmd_entrypoint.sh index 1f5e0f59..b329f52c 100755 --- a/images/worker/slurmd_entrypoint.sh +++ b/images/worker/slurmd_entrypoint.sh @@ -1,96 +1,5 @@ #!/bin/bash -set -e # Exit immediately if any command returns a non-zero error code - -echo "Starting slurmd entrypoint script" -if [ -n "${CGROUP_V2}" ]; then - CGROUP_PATH=$(cat /proc/self/cgroup | sed 's/^0:://') - - if [ -n "${CGROUP_PATH}" ]; then - echo "cgroup v2 detected, creating cgroup for ${CGROUP_PATH}" - mkdir -p /sys/fs/cgroup/${CGROUP_PATH}/../system.slice - else - echo "cgroup v2 detected, but cgroup path is empty" - exit 1 - fi -fi - -echo "Link users from jail" -ln -s /mnt/jail/etc/passwd /etc/passwd -ln -s /mnt/jail/etc/group /etc/group -ln -s /mnt/jail/etc/shadow /etc/shadow -ln -s /mnt/jail/etc/gshadow /etc/gshadow -chown -h 0:42 /etc/{shadow,gshadow} - -echo "Link home from jail because slurmd uses it" -ln -s /mnt/jail/home /home - -echo "Bind-mount slurm configs from K8S config map" -for file in /mnt/slurm-configs/*; do - filename=$(basename "$file") - touch "/etc/slurm/$filename" && mount --bind "$file" "/etc/slurm/$filename" -done - -echo "Make ulimits as big as possible" -set_ulimit() { - local limit_option=$1 - local limit_value=$2 - ulimit $limit_option $limit_value || { echo "ulimit $limit_option: exit code: $?"; } -} -set_ulimit -HSR unlimited # (-R) Max real-time non-blocking time -set_ulimit -HSc unlimited # (-c) Max core file size -set_ulimit -HSd unlimited # (-d) Max "data" segment size -set_ulimit -HSe unlimited # (-e) Max scheduling priority -set_ulimit -HSf unlimited # (-f) Max file size -set_ulimit -HSi unlimited # (-i) Max number of pending signals -set_ulimit -HSl unlimited # (-l) Max locked memory size (is necessary for Infiniband RDMA to work) -set_ulimit -HSm unlimited # (-m) Max physical memory usage -set_ulimit -HSn 1048576 # (-n) Max number of open files -# READ-ONLY # (-p) Max pipe size -set_ulimit -HSq unlimited # (-q) Max POSIX message queue size -set_ulimit -HSr unlimited # (-r) Max real-time priority -set_ulimit -HSs unlimited # (-s) Max stack size -set_ulimit -HSt unlimited # (-t) Max CPU time -set_ulimit -HSu unlimited # (-u) Max number of user processes -set_ulimit -HSv unlimited # (-v) Max virtual memory size -set_ulimit -HSx unlimited # (-x) Max number of file locks - -echo "Apply sysctl limits from /etc/sysctl.conf" -sysctl -p - -echo "Update linker cache" -ldconfig - -echo "Complement jail rootfs" -/opt/bin/slurm/complement_jail.sh -j /mnt/jail -u /mnt/jail.upper -w - -# TODO: Since 1.29 kubernetes supports native sidecar containers. We can remove it in feature releases -echo "Waiting until munge is started" -while [ ! -S "/run/munge/munge.socket.2" ]; do sleep 2; done - -GRES="" -if [ "$SLURM_CLUSTER_TYPE" = "gpu" ]; then - echo "Slurm cluster type is - $SLURM_CLUSTER_TYPE Detect available GPUs" - # The following command converts the nvidia-smi output into the Gres GPU string expected by Slurm. - # For example, if "nvidia-smi --list-gpus" shows this: - # GPU 0: NVIDIA A100-SXM4-80GB (UUID: <...>) - # GPU 1: NVIDIA A100-SXM4-80GB (UUID: <...>) - # GPU 2: NVIDIA V100-SXM4-16GB (UUID: <...>) - # the GRES variable will be equal to "gpu:nvidia_a100-sxm4-80gb:2,gpu:nvidia_v100-sxm2-16gb:1". - # See Slurm docs: https://slurm.schedmd.com/gres.html#AutoDetect - GRES="$(nvidia-smi --query-gpu=name --format=csv,noheader | sed -e 's/ /_/g' -e 's/.*/\L&/' | sort | uniq -c | awk '{print "gpu:" $2 ":" $1}' | paste -sd ',' -)" - echo "Detected GRES is $GRES" - - echo "Create NVML symlink with the name expected by Slurm" - pushd /usr/lib/x86_64-linux-gnu - ln -s libnvidia-ml.so.1 libnvidia-ml.so - popd -else - echo "Skipping GPU detection" -fi - -# Hack with logs: multilog will write log in stdout and in log file, and rotate log file -# # s100000000 (bytes) - 100MB, n5 - 5 files echo "Start slurmd daemon" exec /usr/sbin/slurmd \ -D \ diff --git a/images/worker/sshd_entrypoint.sh b/images/worker/sshd_entrypoint.sh deleted file mode 100644 index f22691b2..00000000 --- a/images/worker/sshd_entrypoint.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -set -e # Exit immediately if any command returns a non-zero error code - -echo "Create privilege separation directory /var/run/sshd" -mkdir -p /var/run/sshd - -echo "Waiting until munge started" -while [ ! -S "/run/munge/munge.socket.2" ]; do sleep 2; done - -echo "Start sshd daemon" -/usr/sbin/sshd -D -e -f /mnt/ssh-configs/sshd_config diff --git a/images/worker/supervisord_entrypoint.sh b/images/worker/supervisord_entrypoint.sh new file mode 100644 index 00000000..a594e844 --- /dev/null +++ b/images/worker/supervisord_entrypoint.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +set -e # Exit immediately if any command returns a non-zero error code + +echo "Starting slurmd entrypoint script" +if [ -n "${CGROUP_V2}" ]; then + CGROUP_PATH=$(cat /proc/self/cgroup | sed 's/^0:://') + + if [ -n "${CGROUP_PATH}" ]; then + echo "cgroup v2 detected, creating cgroup for ${CGROUP_PATH}" + mkdir -p /sys/fs/cgroup/${CGROUP_PATH}/../system.slice + else + echo "cgroup v2 detected, but cgroup path is empty" + exit 1 + fi +fi + +echo "Link users from jail" +ln -s /mnt/jail/etc/passwd /etc/passwd +ln -s /mnt/jail/etc/group /etc/group +ln -s /mnt/jail/etc/shadow /etc/shadow +ln -s /mnt/jail/etc/gshadow /etc/gshadow +chown -h 0:42 /etc/{shadow,gshadow} + +echo "Link home from jail because slurmd uses it" +ln -s /mnt/jail/home /home + +echo "Bind-mount slurm configs from K8S config map" +for file in /mnt/slurm-configs/*; do + filename=$(basename "$file") + touch "/etc/slurm/$filename" && mount --bind "$file" "/etc/slurm/$filename" +done + +echo "Make ulimits as big as possible" +set_ulimit() { + local limit_option=$1 + local limit_value=$2 + ulimit $limit_option $limit_value || { echo "ulimit $limit_option: exit code: $?"; } +} +set_ulimit -HSR unlimited # (-R) Max real-time non-blocking time +set_ulimit -HSc unlimited # (-c) Max core file size +set_ulimit -HSd unlimited # (-d) Max "data" segment size +set_ulimit -HSe unlimited # (-e) Max scheduling priority +set_ulimit -HSf unlimited # (-f) Max file size +set_ulimit -HSi unlimited # (-i) Max number of pending signals +set_ulimit -HSl unlimited # (-l) Max locked memory size (is necessary for Infiniband RDMA to work) +set_ulimit -HSm unlimited # (-m) Max physical memory usage +set_ulimit -HSn 1048576 # (-n) Max number of open files +# READ-ONLY # (-p) Max pipe size +set_ulimit -HSq unlimited # (-q) Max POSIX message queue size +set_ulimit -HSr unlimited # (-r) Max real-time priority +set_ulimit -HSs unlimited # (-s) Max stack size +set_ulimit -HSt unlimited # (-t) Max CPU time +set_ulimit -HSu unlimited # (-u) Max number of user processes +set_ulimit -HSv unlimited # (-v) Max virtual memory size +set_ulimit -HSx unlimited # (-x) Max number of file locks + +echo "Apply sysctl limits from /etc/sysctl.conf" +sysctl -p + +echo "Update linker cache" +ldconfig + +echo "Complement jail rootfs" +/opt/bin/slurm/complement_jail.sh -j /mnt/jail -u /mnt/jail.upper -w + +echo "Create privilege separation directory /var/run/sshd" +mkdir -p /var/run/sshd + +# TODO: Since 1.29 kubernetes supports native sidecar containers. We can remove it in feature releases +echo "Waiting until munge is started" +while [ ! -S "/run/munge/munge.socket.2" ]; do sleep 2; done + +GRES="" +if [ "$SLURM_CLUSTER_TYPE" = "gpu" ]; then + echo "Slurm cluster type is - $SLURM_CLUSTER_TYPE Detect available GPUs" + # The following command converts the nvidia-smi output into the Gres GPU string expected by Slurm. + # For example, if "nvidia-smi --list-gpus" shows this: + # GPU 0: NVIDIA A100-SXM4-80GB (UUID: <...>) + # GPU 1: NVIDIA A100-SXM4-80GB (UUID: <...>) + # GPU 2: NVIDIA V100-SXM4-16GB (UUID: <...>) + # the GRES variable will be equal to "gpu:nvidia_a100-sxm4-80gb:2,gpu:nvidia_v100-sxm2-16gb:1". + # See Slurm docs: https://slurm.schedmd.com/gres.html#AutoDetect + export GRES="$(nvidia-smi --query-gpu=name --format=csv,noheader | sed -e 's/ /_/g' -e 's/.*/\L&/' | sort | uniq -c | awk '{print "gpu:" $2 ":" $1}' | paste -sd ',' -)" + + echo "Detected GRES is $GRES" + + echo "Create NVML symlink with the name expected by Slurm" + pushd /usr/lib/x86_64-linux-gnu + ln -s libnvidia-ml.so.1 libnvidia-ml.so + popd +else + echo "Skipping GPU detection" +fi + +# Hack with logs: multilog will write log in stdout and in log file, and rotate log file +echo "Start supervisord daemon" +/usr/bin/supervisord diff --git a/internal/render/worker/configmap.go b/internal/render/worker/configmap.go index d353c872..92f548fb 100644 --- a/internal/render/worker/configmap.go +++ b/internal/render/worker/configmap.go @@ -207,7 +207,7 @@ func generateSupervisordConfig() renderutils.ConfigFile { res.AddLine("stderr_logfile=/dev/fd/2") res.AddLine("stderr_logfile_maxbytes=0") res.AddLine("redirect_stderr=true") - res.AddLine("command=/opt/bin/slurm/sshd_entrypoint.sh") + res.AddLine("command=/usr/sbin/sshd -D -e -f /mnt/ssh-configs/sshd_config") res.AddLine("autostart=true") res.AddLine("autorestart=true") res.AddLine("startretries=5") From f51a99256fe2a715af5401246a2696456d903d81 Mon Sep 17 00:00:00 2001 From: Uburro Date: Tue, 17 Dec 2024 17:41:51 +0100 Subject: [PATCH 02/12] MSP-3645: add OOMKillStep --- api/v1/slurmcluster_types.go | 2 +- config/crd/bases/slurm.nebius.ai_slurmclusters.yaml | 2 +- helm/soperator-crds/templates/slurmcluster-crd.yaml | 2 +- helm/soperator/crds/slurmcluster-crd.yaml | 2 +- images/worker/supervisord_entrypoint.sh | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/api/v1/slurmcluster_types.go b/api/v1/slurmcluster_types.go index 979f5a71..50728231 100644 --- a/api/v1/slurmcluster_types.go +++ b/api/v1/slurmcluster_types.go @@ -104,7 +104,7 @@ type SlurmConfig struct { // +kubebuilder:validation:Pattern="^((Accrue|Agent|AuditRPCs|Backfill|BackfillMap|BurstBuffer|Cgroup|ConMgr|CPU_Bind|CpuFrequency|Data|DBD_Agent|Dependency|Elasticsearch|Energy|Federation|FrontEnd|Gres|Hetjob|Gang|GLOB_SILENCE|JobAccountGather|JobComp|JobContainer|License|Network|NetworkRaw|NodeFeatures|NO_CONF_HASH|Power|Priority|Profile|Protocol|Reservation|Route|Script|SelectType|Steps|Switch|TLS|TraceJobs|Triggers)(,)?)+$" DebugFlags string `json:"debugFlags,omitempty"` // +kubebuilder:validation:Optional - // +kubebuilder:default="Verbose" + // +kubebuilder:default="Verbose,OOMKillStep" // +kubebuilder:validation:Pattern="^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$" TaskPluginParam string `json:"taskPluginParam,omitempty"` } diff --git a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml index 4dfbdcca..693b76f5 100644 --- a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml +++ b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml @@ -1497,7 +1497,7 @@ spec: format: int32 type: integer taskPluginParam: - default: Verbose + default: Verbose,OOMKillStep pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$ type: string type: object diff --git a/helm/soperator-crds/templates/slurmcluster-crd.yaml b/helm/soperator-crds/templates/slurmcluster-crd.yaml index 2381a49a..1b29b7f3 100644 --- a/helm/soperator-crds/templates/slurmcluster-crd.yaml +++ b/helm/soperator-crds/templates/slurmcluster-crd.yaml @@ -1496,7 +1496,7 @@ spec: format: int32 type: integer taskPluginParam: - default: Verbose + default: Verbose,OOMKillStep pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$ type: string type: object diff --git a/helm/soperator/crds/slurmcluster-crd.yaml b/helm/soperator/crds/slurmcluster-crd.yaml index 2381a49a..1b29b7f3 100644 --- a/helm/soperator/crds/slurmcluster-crd.yaml +++ b/helm/soperator/crds/slurmcluster-crd.yaml @@ -1496,7 +1496,7 @@ spec: format: int32 type: integer taskPluginParam: - default: Verbose + default: Verbose,OOMKillStep pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$ type: string type: object diff --git a/images/worker/supervisord_entrypoint.sh b/images/worker/supervisord_entrypoint.sh index a594e844..b012bba5 100644 --- a/images/worker/supervisord_entrypoint.sh +++ b/images/worker/supervisord_entrypoint.sh @@ -9,6 +9,7 @@ if [ -n "${CGROUP_V2}" ]; then if [ -n "${CGROUP_PATH}" ]; then echo "cgroup v2 detected, creating cgroup for ${CGROUP_PATH}" mkdir -p /sys/fs/cgroup/${CGROUP_PATH}/../system.slice + echo "1" > /sys/fs/cgroup/${CGROUP_PATH}/../system.slice/memory.oom.group else echo "cgroup v2 detected, but cgroup path is empty" exit 1 From b375c943cbd496d01cd92ff5dc8ef614ae15a78e Mon Sep 17 00:00:00 2001 From: Uburro Date: Tue, 17 Dec 2024 18:57:20 +0100 Subject: [PATCH 03/12] MSP-3645: add task and jobs params into slurm.conf --- api/v1/slurmcluster_types.go | 21 +++++++++++++- .../bases/slurm.nebius.ai_slurmclusters.yaml | 29 ++++++++++++++++++- .../templates/slurm-cluster-cr.yaml | 4 +++ helm/slurm-cluster/values.yaml | 9 ++++++ .../templates/slurmcluster-crd.yaml | 29 ++++++++++++++++++- helm/soperator/crds/slurmcluster-crd.yaml | 29 ++++++++++++++++++- images/worker/supervisord_entrypoint.sh | 3 +- internal/render/common/configmap.go | 6 ---- 8 files changed, 119 insertions(+), 11 deletions(-) diff --git a/api/v1/slurmcluster_types.go b/api/v1/slurmcluster_types.go index 50728231..6888700c 100644 --- a/api/v1/slurmcluster_types.go +++ b/api/v1/slurmcluster_types.go @@ -77,6 +77,7 @@ type SlurmClusterSpec struct { // SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. // // +kubebuilder:validation:Optional + // +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", taskPluginParam: "Verbose", taskPlugin: "task/cgroup,task/affinity", maxJobCount: 10000, minJobAge: 86400} SlurmConfig SlurmConfig `json:"slurmConfig,omitempty"` } @@ -103,10 +104,28 @@ type SlurmConfig struct { // +kubebuilder:default="Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs" // +kubebuilder:validation:Pattern="^((Accrue|Agent|AuditRPCs|Backfill|BackfillMap|BurstBuffer|Cgroup|ConMgr|CPU_Bind|CpuFrequency|Data|DBD_Agent|Dependency|Elasticsearch|Energy|Federation|FrontEnd|Gres|Hetjob|Gang|GLOB_SILENCE|JobAccountGather|JobComp|JobContainer|License|Network|NetworkRaw|NodeFeatures|NO_CONF_HASH|Power|Priority|Profile|Protocol|Reservation|Route|Script|SelectType|Steps|Switch|TLS|TraceJobs|Triggers)(,)?)+$" DebugFlags string `json:"debugFlags,omitempty"` + // Identifies the type of task launch plugin (e.g. pinning tasks to specific processors) + // + // +kubebuilder:validation:Pattern="^((task/affinity|task/cgroup|task/none)(,)?)+$" + // +kubebuilder:validation:Optional + // +kubebuilder:default="task/cgroup,task/affinity" + TaskPlugin string `json:"taskPlugin,omitempty"` + // Additional parameters for the task plugin + // // +kubebuilder:validation:Optional - // +kubebuilder:default="Verbose,OOMKillStep" + // +kubebuilder:default="Verbose" // +kubebuilder:validation:Pattern="^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$" TaskPluginParam string `json:"taskPluginParam,omitempty"` + // Keep N last jobs in controller memory + // + // +kubebuilder:validation:Optional + // +kubebuilder:default=10000 + MaxJobCount int32 `json:"maxJobCount,omitempty"` + // Don't remove jobs from controller memory after some time + // + // +kubebuilder:validation:Optional + // +kubebuilder:default=86400 + MinJobAge int32 `json:"minJobAge,omitempty"` } type PartitionConfiguration struct { diff --git a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml index 693b76f5..91db34d5 100644 --- a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml +++ b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml @@ -1470,6 +1470,15 @@ spec: type: string type: object slurmConfig: + default: + completeWait: 5 + debugFlags: Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs + defCpuPerGPU: 16 + defMemPerNode: 1228800 + maxJobCount: 10000 + minJobAge: 86400 + taskPlugin: task/cgroup,task/affinity + taskPluginParam: Verbose description: SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. properties: @@ -1496,8 +1505,26 @@ spec: node in mebibytes. format: int32 type: integer + maxJobCount: + default: 10000 + description: Keep N last jobs in controller memory + format: int32 + type: integer + minJobAge: + default: 86400 + description: Don't remove jobs from controller memory after some + time + format: int32 + type: integer + taskPlugin: + default: task/cgroup,task/affinity + description: Identifies the type of task launch plugin (e.g. pinning + tasks to specific processors) + pattern: ^((task/affinity|task/cgroup|task/none)(,)?)+$ + type: string taskPluginParam: - default: Verbose,OOMKillStep + default: Verbose + description: Additional parameters for the task plugin pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$ type: string type: object diff --git a/helm/slurm-cluster/templates/slurm-cluster-cr.yaml b/helm/slurm-cluster/templates/slurm-cluster-cr.yaml index 22b874ee..cd4569af 100644 --- a/helm/slurm-cluster/templates/slurm-cluster-cr.yaml +++ b/helm/slurm-cluster/templates/slurm-cluster-cr.yaml @@ -13,6 +13,10 @@ metadata: {{- . | toYaml | nindent 4 }} {{- end }} spec: + {{- if .Values.slurmConfig }} + slurmConfig: + {{- toYaml .Values.slurmConfig | nindent 4 }} + {{- end }} crVersion: {{ .Chart.Version }} pause: {{ .Values.pause }} clusterType: {{ .Values.clusterType }} diff --git a/helm/slurm-cluster/values.yaml b/helm/slurm-cluster/values.yaml index e89fdfbc..38e6a04e 100644 --- a/helm/slurm-cluster/values.yaml +++ b/helm/slurm-cluster/values.yaml @@ -116,6 +116,15 @@ periodicChecks: k8sNodeFilterName: "no-gpu" imagePullPolicy: "IfNotPresent" appArmorProfile: "unconfined" +slurmConfig: {} + # defMemPerNode: 1228800 + # defCpuPerGPU: 16 + # completeWait: 5 + # debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs" + # taskPluginParam: "Verbose" + # taskPlugin: "task/cgroup,task/affinity" + # maxJobCount: 10000 + # minJobAge: 86400 slurmNodes: accounting: enabled: false diff --git a/helm/soperator-crds/templates/slurmcluster-crd.yaml b/helm/soperator-crds/templates/slurmcluster-crd.yaml index 1b29b7f3..fcc7d516 100644 --- a/helm/soperator-crds/templates/slurmcluster-crd.yaml +++ b/helm/soperator-crds/templates/slurmcluster-crd.yaml @@ -1469,6 +1469,15 @@ spec: type: string type: object slurmConfig: + default: + completeWait: 5 + debugFlags: Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs + defCpuPerGPU: 16 + defMemPerNode: 1228800 + maxJobCount: 10000 + minJobAge: 86400 + taskPlugin: task/cgroup,task/affinity + taskPluginParam: Verbose description: SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. properties: @@ -1495,8 +1504,26 @@ spec: node in mebibytes. format: int32 type: integer + maxJobCount: + default: 10000 + description: Keep N last jobs in controller memory + format: int32 + type: integer + minJobAge: + default: 86400 + description: Don't remove jobs from controller memory after some + time + format: int32 + type: integer + taskPlugin: + default: task/cgroup,task/affinity + description: Identifies the type of task launch plugin (e.g. pinning + tasks to specific processors) + pattern: ^((task/affinity|task/cgroup|task/none)(,)?)+$ + type: string taskPluginParam: - default: Verbose,OOMKillStep + default: Verbose + description: Additional parameters for the task plugin pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$ type: string type: object diff --git a/helm/soperator/crds/slurmcluster-crd.yaml b/helm/soperator/crds/slurmcluster-crd.yaml index 1b29b7f3..fcc7d516 100644 --- a/helm/soperator/crds/slurmcluster-crd.yaml +++ b/helm/soperator/crds/slurmcluster-crd.yaml @@ -1469,6 +1469,15 @@ spec: type: string type: object slurmConfig: + default: + completeWait: 5 + debugFlags: Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs + defCpuPerGPU: 16 + defMemPerNode: 1228800 + maxJobCount: 10000 + minJobAge: 86400 + taskPlugin: task/cgroup,task/affinity + taskPluginParam: Verbose description: SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. properties: @@ -1495,8 +1504,26 @@ spec: node in mebibytes. format: int32 type: integer + maxJobCount: + default: 10000 + description: Keep N last jobs in controller memory + format: int32 + type: integer + minJobAge: + default: 86400 + description: Don't remove jobs from controller memory after some + time + format: int32 + type: integer + taskPlugin: + default: task/cgroup,task/affinity + description: Identifies the type of task launch plugin (e.g. pinning + tasks to specific processors) + pattern: ^((task/affinity|task/cgroup|task/none)(,)?)+$ + type: string taskPluginParam: - default: Verbose,OOMKillStep + default: Verbose + description: Additional parameters for the task plugin pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$ type: string type: object diff --git a/images/worker/supervisord_entrypoint.sh b/images/worker/supervisord_entrypoint.sh index b012bba5..96108405 100644 --- a/images/worker/supervisord_entrypoint.sh +++ b/images/worker/supervisord_entrypoint.sh @@ -9,7 +9,8 @@ if [ -n "${CGROUP_V2}" ]; then if [ -n "${CGROUP_PATH}" ]; then echo "cgroup v2 detected, creating cgroup for ${CGROUP_PATH}" mkdir -p /sys/fs/cgroup/${CGROUP_PATH}/../system.slice - echo "1" > /sys/fs/cgroup/${CGROUP_PATH}/../system.slice/memory.oom.group + # TODO: uncomment this line when 24.11 will be tested. It is OOMKillStep for taskPluginParam + # echo "1" > /sys/fs/cgroup/${CGROUP_PATH}/../system.slice/memory.oom.group else echo "cgroup v2 detected, but cgroup path is empty" exit 1 diff --git a/internal/render/common/configmap.go b/internal/render/common/configmap.go index 07af9a67..438be14c 100644 --- a/internal/render/common/configmap.go +++ b/internal/render/common/configmap.go @@ -90,17 +90,11 @@ func generateSlurmConfig(cluster *values.SlurmCluster) renderutils.ConfigFile { res.AddComment("") res.AddProperty("StateSaveLocation", naming.BuildVolumeMountSpoolPath(consts.SlurmctldName)) res.AddComment("") - res.AddProperty("TaskPlugin", "task/cgroup,task/affinity") - res.AddComment("") res.AddProperty("CliFilterPlugins", "cli_filter/user_defaults") res.AddComment("") res.AddProperty("LaunchParameters", "use_interactive_step") res.AddComment("Scrontab") res.AddProperty("ScronParameters", "enable,explicit_scancel") - res.AddComment("") - res.AddProperty("MaxJobCount", 1000) // Keep 1000 last jobs in controller memory - res.AddProperty("MinJobAge", 86400) // Don't remove jobs from controller memory after some time - res.AddComment("") res.AddProperty("PropagateResourceLimits", "NONE") // Don't propagate ulimits from the login node by default res.AddComment("") res.AddComment("HEALTH CHECKS") From 673d5e8973a685f85be053ce6d8f9f4fc351ab06 Mon Sep 17 00:00:00 2001 From: Uburro Date: Tue, 17 Dec 2024 19:10:43 +0100 Subject: [PATCH 04/12] MSP-3645: rollback TaskPlugin default --- api/v1/slurmcluster_types.go | 8 +------- helm/slurm-cluster/values.yaml | 1 - internal/render/common/configmap.go | 2 ++ 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/api/v1/slurmcluster_types.go b/api/v1/slurmcluster_types.go index 6888700c..852f0bf5 100644 --- a/api/v1/slurmcluster_types.go +++ b/api/v1/slurmcluster_types.go @@ -77,7 +77,7 @@ type SlurmClusterSpec struct { // SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. // // +kubebuilder:validation:Optional - // +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", taskPluginParam: "Verbose", taskPlugin: "task/cgroup,task/affinity", maxJobCount: 10000, minJobAge: 86400} + // +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", taskPluginParam: "Verbose", maxJobCount: 10000, minJobAge: 86400} SlurmConfig SlurmConfig `json:"slurmConfig,omitempty"` } @@ -104,12 +104,6 @@ type SlurmConfig struct { // +kubebuilder:default="Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs" // +kubebuilder:validation:Pattern="^((Accrue|Agent|AuditRPCs|Backfill|BackfillMap|BurstBuffer|Cgroup|ConMgr|CPU_Bind|CpuFrequency|Data|DBD_Agent|Dependency|Elasticsearch|Energy|Federation|FrontEnd|Gres|Hetjob|Gang|GLOB_SILENCE|JobAccountGather|JobComp|JobContainer|License|Network|NetworkRaw|NodeFeatures|NO_CONF_HASH|Power|Priority|Profile|Protocol|Reservation|Route|Script|SelectType|Steps|Switch|TLS|TraceJobs|Triggers)(,)?)+$" DebugFlags string `json:"debugFlags,omitempty"` - // Identifies the type of task launch plugin (e.g. pinning tasks to specific processors) - // - // +kubebuilder:validation:Pattern="^((task/affinity|task/cgroup|task/none)(,)?)+$" - // +kubebuilder:validation:Optional - // +kubebuilder:default="task/cgroup,task/affinity" - TaskPlugin string `json:"taskPlugin,omitempty"` // Additional parameters for the task plugin // // +kubebuilder:validation:Optional diff --git a/helm/slurm-cluster/values.yaml b/helm/slurm-cluster/values.yaml index 38e6a04e..02736886 100644 --- a/helm/slurm-cluster/values.yaml +++ b/helm/slurm-cluster/values.yaml @@ -122,7 +122,6 @@ slurmConfig: {} # completeWait: 5 # debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs" # taskPluginParam: "Verbose" - # taskPlugin: "task/cgroup,task/affinity" # maxJobCount: 10000 # minJobAge: 86400 slurmNodes: diff --git a/internal/render/common/configmap.go b/internal/render/common/configmap.go index 438be14c..3655edab 100644 --- a/internal/render/common/configmap.go +++ b/internal/render/common/configmap.go @@ -90,6 +90,8 @@ func generateSlurmConfig(cluster *values.SlurmCluster) renderutils.ConfigFile { res.AddComment("") res.AddProperty("StateSaveLocation", naming.BuildVolumeMountSpoolPath(consts.SlurmctldName)) res.AddComment("") + res.AddProperty("TaskPlugin", "task/cgroup,task/affinity") + res.AddComment("") res.AddProperty("CliFilterPlugins", "cli_filter/user_defaults") res.AddComment("") res.AddProperty("LaunchParameters", "use_interactive_step") From 0f410d9c1e72b60a8dc70132f139042a6fe0a78f Mon Sep 17 00:00:00 2001 From: Uburro Date: Wed, 18 Dec 2024 12:51:11 +0100 Subject: [PATCH 05/12] NOTIC: add max startretries --- internal/render/worker/configmap.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/render/worker/configmap.go b/internal/render/worker/configmap.go index 92f548fb..b4f4dae9 100644 --- a/internal/render/worker/configmap.go +++ b/internal/render/worker/configmap.go @@ -194,7 +194,7 @@ func generateSupervisordConfig() renderutils.ConfigFile { res.AddLine("command=/opt/bin/slurm/slurmd_entrypoint.sh") res.AddLine("autostart=true") res.AddLine("autorestart=true") - res.AddLine("startretries=5") + res.AddLine("startsecs=0") res.AddLine("stopasgroup=true ; Send SIGTERM to all child processes of supervisord") res.AddLine("killasgroup=true ; Send SIGKILL to all child processes of supervisord") res.AddLine("stopsignal=SIGTERM ; Signal to send to the program to stop it") @@ -210,7 +210,7 @@ func generateSupervisordConfig() renderutils.ConfigFile { res.AddLine("command=/usr/sbin/sshd -D -e -f /mnt/ssh-configs/sshd_config") res.AddLine("autostart=true") res.AddLine("autorestart=true") - res.AddLine("startretries=5") + res.AddLine("startsecs=0") res.AddLine("stopasgroup=true ; Send SIGTERM to all child processes of supervisord") res.AddLine("killasgroup=true ; Send SIGKILL to all child processes of supervisord") res.AddLine("stopsignal=SIGTERM ; Signal to send to the program to stop it") From 47b6963cdc917620995b56b471886e0d9ac25027 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:31:07 +0000 Subject: [PATCH 06/12] build(deps): bump google.golang.org/grpc in /images/jail/gpubench Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.69.0 to 1.69.2. - [Release notes](https://github.com/grpc/grpc-go/releases) - [Commits](https://github.com/grpc/grpc-go/compare/v1.69.0...v1.69.2) --- updated-dependencies: - dependency-name: google.golang.org/grpc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- images/jail/gpubench/go.mod | 2 +- images/jail/gpubench/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/images/jail/gpubench/go.mod b/images/jail/gpubench/go.mod index 127a3961..651e3391 100644 --- a/images/jail/gpubench/go.mod +++ b/images/jail/gpubench/go.mod @@ -13,7 +13,7 @@ require ( go.opentelemetry.io/otel/metric v1.33.0 go.opentelemetry.io/otel/sdk v1.33.0 go.opentelemetry.io/otel/sdk/metric v1.33.0 - google.golang.org/grpc v1.69.0 + google.golang.org/grpc v1.69.2 k8s.io/api v0.31.4 k8s.io/apimachinery v0.31.4 k8s.io/client-go v0.31.3 diff --git a/images/jail/gpubench/go.sum b/images/jail/gpubench/go.sum index b26ce31e..9ab08ac6 100644 --- a/images/jail/gpubench/go.sum +++ b/images/jail/gpubench/go.sum @@ -156,8 +156,8 @@ google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1: google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08= google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY= google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU= -google.golang.org/grpc v1.69.0 h1:quSiOM1GJPmPH5XtU+BCoVXcDVJJAzNcoyfC2cCjGkI= -google.golang.org/grpc v1.69.0/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= +google.golang.org/grpc v1.69.2 h1:U3S9QEtbXC0bYNvRtcoklF3xGtLViumSYxWykJS+7AU= +google.golang.org/grpc v1.69.2/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From 316afd479308a9142292b6c97a31fed0b9ba6160 Mon Sep 17 00:00:00 2001 From: Pavel Sofronii Date: Fri, 20 Dec 2024 15:30:43 +0100 Subject: [PATCH 07/12] Fix daemonset labels --- internal/consts/component.go | 21 +++++++++++---------- internal/render/worker/daemonset.go | 16 +++++----------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/internal/consts/component.go b/internal/consts/component.go index 4a3f69e6..c7250470 100644 --- a/internal/consts/component.go +++ b/internal/consts/component.go @@ -19,14 +19,15 @@ func (b baseComponentType) String() string { } var ( - ComponentTypeCommon ComponentType = baseComponentType{"common"} - ComponentTypeController ComponentType = baseComponentType{"controller"} - ComponentTypeAccounting ComponentType = baseComponentType{"accounting"} - ComponentTypeREST ComponentType = baseComponentType{"rest"} - ComponentTypeWorker ComponentType = baseComponentType{"worker"} - ComponentTypeLogin ComponentType = baseComponentType{"login"} - ComponentTypeBenchmark ComponentType = baseComponentType{"nccl-benchmark"} - ComponentTypePopulateJail ComponentType = baseComponentType{"populate-jail"} - ComponentTypeExporter ComponentType = baseComponentType{"exporter"} - ComponentTypeMariaDbOperator ComponentType = baseComponentType{"mariadb-operator"} + ComponentTypeCommon ComponentType = baseComponentType{"common"} + ComponentTypeController ComponentType = baseComponentType{"controller"} + ComponentTypeAccounting ComponentType = baseComponentType{"accounting"} + ComponentTypeREST ComponentType = baseComponentType{"rest"} + ComponentTypeWorker ComponentType = baseComponentType{"worker"} + ComponentTypeNodeSysctlDaemonSet ComponentType = baseComponentType{"node-sysctl-daemon-set"} + ComponentTypeLogin ComponentType = baseComponentType{"login"} + ComponentTypeBenchmark ComponentType = baseComponentType{"nccl-benchmark"} + ComponentTypePopulateJail ComponentType = baseComponentType{"populate-jail"} + ComponentTypeExporter ComponentType = baseComponentType{"exporter"} + ComponentTypeMariaDbOperator ComponentType = baseComponentType{"mariadb-operator"} ) diff --git a/internal/render/worker/daemonset.go b/internal/render/worker/daemonset.go index 504edbb4..761082ba 100644 --- a/internal/render/worker/daemonset.go +++ b/internal/render/worker/daemonset.go @@ -7,7 +7,6 @@ import ( slurmv1 "nebius.ai/slurm-operator/api/v1" "nebius.ai/slurm-operator/internal/consts" - "nebius.ai/slurm-operator/internal/naming" "nebius.ai/slurm-operator/internal/render/common" "nebius.ai/slurm-operator/internal/utils" ) @@ -18,8 +17,8 @@ func RenderDaemonSet( K8sNodeFilterName string, nodeFilters []slurmv1.K8sNodeFilter, ) appsv1.DaemonSet { - labels := common.RenderLabels(consts.ComponentTypeWorker, clusterName) - matchLabels := common.RenderMatchLabels(consts.ComponentTypeWorker, clusterName) + labels := common.RenderLabels(consts.ComponentTypeNodeSysctlDaemonSet, clusterName) + matchLabels := common.RenderMatchLabels(consts.ComponentTypeNodeSysctlDaemonSet, clusterName) nodeFilter := utils.MustGetBy( nodeFilters, @@ -46,14 +45,9 @@ func RenderDaemonSet( Labels: labels, }, Spec: corev1.PodSpec{ - Affinity: nodeFilter.Affinity, - NodeSelector: nodeFilter.NodeSelector, - Tolerations: nodeFilter.Tolerations, - DNSConfig: &corev1.PodDNSConfig{ - Searches: []string{ - naming.BuildServiceFQDN(consts.ComponentTypeWorker, namespace, clusterName), - }, - }, + Affinity: nodeFilter.Affinity, + NodeSelector: nodeFilter.NodeSelector, + Tolerations: nodeFilter.Tolerations, InitContainers: initContainers, Containers: []corev1.Container{ renderContainerNodeSysctlSleep(), From 9f2bfc51d4f76cf097b345aceacbb5301be4ec64 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:01:18 +0000 Subject: [PATCH 08/12] build(deps): bump github.com/onsi/ginkgo/v2 from 2.22.0 to 2.22.1 Bumps [github.com/onsi/ginkgo/v2](https://github.com/onsi/ginkgo) from 2.22.0 to 2.22.1. - [Release notes](https://github.com/onsi/ginkgo/releases) - [Changelog](https://github.com/onsi/ginkgo/blob/master/CHANGELOG.md) - [Commits](https://github.com/onsi/ginkgo/compare/v2.22.0...v2.22.1) --- updated-dependencies: - dependency-name: github.com/onsi/ginkgo/v2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 8 ++++---- go.sum | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index fcfa0e78..0b0d60ed 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ toolchain go1.23.3 require ( github.com/go-logr/logr v1.4.2 - github.com/onsi/ginkgo/v2 v2.22.0 + github.com/onsi/ginkgo/v2 v2.22.1 github.com/onsi/gomega v1.36.1 github.com/open-telemetry/opentelemetry-operator v0.103.0 github.com/pkg/errors v0.9.1 @@ -52,7 +52,7 @@ require ( github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect + github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect github.com/google/uuid v1.6.0 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -75,13 +75,13 @@ require ( go.opentelemetry.io/otel/sdk/metric v1.28.0 // indirect go.opentelemetry.io/otel/trace v1.28.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/net v0.30.0 // indirect + golang.org/x/net v0.32.0 // indirect golang.org/x/oauth2 v0.22.0 // indirect golang.org/x/sys v0.28.0 // indirect golang.org/x/term v0.27.0 // indirect golang.org/x/text v0.21.0 // indirect golang.org/x/time v0.5.0 // indirect - golang.org/x/tools v0.26.0 // indirect + golang.org/x/tools v0.28.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/protobuf v1.35.1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go.sum b/go.sum index 41649b31..5d7b3970 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,8 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= @@ -72,8 +72,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= -github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/ginkgo/v2 v2.22.1 h1:QW7tbJAUDyVDVOM5dFa7qaybo+CRfR7bemlQUN6Z8aM= +github.com/onsi/ginkgo/v2 v2.22.1/go.mod h1:S6aTpoRsSq2cZOd+pssHAlKW/Q/jZt6cPrPlnj4a1xM= github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= github.com/open-telemetry/opentelemetry-operator v0.103.0 h1:L0REMuJSMZjqCw7p7fWMn19XkiIULMr3NnHdPLryMQs= @@ -144,8 +144,8 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -168,8 +168,8 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= -golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8= +golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From afd54f021a800c2ca9072275ac5631a5207e7c6d Mon Sep 17 00:00:00 2001 From: rdjjke Date: Mon, 23 Dec 2024 12:38:56 +0100 Subject: [PATCH 09/12] MSP-3783: Support setting Slurm node 'extra' field --- api/v1/slurmcluster_types.go | 6 +++++ .../bases/slurm.nebius.ai_slurmclusters.yaml | 12 +++++----- .../templates/slurm-cluster-cr.yaml | 3 +++ helm/slurm-cluster/values.yaml | 1 + .../templates/slurmcluster-crd.yaml | 12 +++++----- helm/soperator/crds/slurmcluster-crd.yaml | 12 +++++----- images/worker/slurmd_entrypoint.sh | 6 +++++ internal/render/worker/container.go | 22 +++++++++++++++++-- internal/render/worker/container_test.go | 10 ++++++++- internal/render/worker/statefulset.go | 1 + internal/values/slurm_worker.go | 6 +++-- 11 files changed, 65 insertions(+), 26 deletions(-) diff --git a/api/v1/slurmcluster_types.go b/api/v1/slurmcluster_types.go index 852f0bf5..c5181cf0 100644 --- a/api/v1/slurmcluster_types.go +++ b/api/v1/slurmcluster_types.go @@ -658,6 +658,12 @@ type SlurmNodeWorker struct { // +kubebuilder:validation:Optional // +kubebuilder:default=false EnableGDRCopy bool `json:"enableGDRCopy,omitempty"` + + // SlurmNodeExtra defines the string that will be set to the "Extra" field of the corresponding Slurm node. It can + // use any environment variables that are available in the slurmd container when it starts. + // + // +kubebuilder:validation:Optional + SlurmNodeExtra string `json:"slurmNodeExtra,omitempty"` } // SlurmNodeWorkerVolumes defines the volumes for the Slurm worker node diff --git a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml index 91db34d5..48c43b23 100644 --- a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml +++ b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml @@ -1477,7 +1477,6 @@ spec: defMemPerNode: 1228800 maxJobCount: 10000 minJobAge: 86400 - taskPlugin: task/cgroup,task/affinity taskPluginParam: Verbose description: SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. @@ -1516,12 +1515,6 @@ spec: time format: int32 type: integer - taskPlugin: - default: task/cgroup,task/affinity - description: Identifies the type of task launch plugin (e.g. pinning - tasks to specific processors) - pattern: ^((task/affinity|task/cgroup|task/none)(,)?)+$ - type: string taskPluginParam: default: Verbose description: Additional parameters for the task plugin @@ -3805,6 +3798,11 @@ spec: description: Size defines the number of node instances format: int32 type: integer + slurmNodeExtra: + description: |- + SlurmNodeExtra defines the string that will be set to the "Extra" field of the corresponding Slurm node. It can + use any environment variables that are available in the slurmd container when it starts. + type: string slurmd: description: Slurmd represents the Slurm daemon service configuration properties: diff --git a/helm/slurm-cluster/templates/slurm-cluster-cr.yaml b/helm/slurm-cluster/templates/slurm-cluster-cr.yaml index cd4569af..8b9b809a 100644 --- a/helm/slurm-cluster/templates/slurm-cluster-cr.yaml +++ b/helm/slurm-cluster/templates/slurm-cluster-cr.yaml @@ -157,6 +157,9 @@ spec: worker: cgroupVersion: {{ .Values.slurmNodes.worker.cgroupVersion | quote }} enableGDRCopy: {{ default false .Values.slurmNodes.worker.enableGDRCopy }} + {{- if .Values.slurmNodes.worker.slurmNodeExtra }} + slurmNodeExtra: {{ .Values.slurmNodes.worker.slurmNodeExtra | quote }} + {{- end }} size: {{ required ".Values.slurmNodes.worker.size must be provided." .Values.slurmNodes.worker.size }} k8sNodeFilterName: {{ required ".Values.slurmNodes.worker.k8sNodeFilterName must be provided." .Values.slurmNodes.worker.k8sNodeFilterName | quote }} {{- if .Values.slurmNodes.worker.supervisordConfigMapRefName }} diff --git a/helm/slurm-cluster/values.yaml b/helm/slurm-cluster/values.yaml index 02736886..3c9fa08e 100644 --- a/helm/slurm-cluster/values.yaml +++ b/helm/slurm-cluster/values.yaml @@ -275,6 +275,7 @@ slurmNodes: k8sNodeFilterName: "gpu" cgroupVersion: v2 enableGDRCopy: false + slurmNodeExtra: "" supervisordConfigMapRefName: "" slurmd: imagePullPolicy: "IfNotPresent" diff --git a/helm/soperator-crds/templates/slurmcluster-crd.yaml b/helm/soperator-crds/templates/slurmcluster-crd.yaml index fcc7d516..a1db1769 100644 --- a/helm/soperator-crds/templates/slurmcluster-crd.yaml +++ b/helm/soperator-crds/templates/slurmcluster-crd.yaml @@ -1476,7 +1476,6 @@ spec: defMemPerNode: 1228800 maxJobCount: 10000 minJobAge: 86400 - taskPlugin: task/cgroup,task/affinity taskPluginParam: Verbose description: SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. @@ -1515,12 +1514,6 @@ spec: time format: int32 type: integer - taskPlugin: - default: task/cgroup,task/affinity - description: Identifies the type of task launch plugin (e.g. pinning - tasks to specific processors) - pattern: ^((task/affinity|task/cgroup|task/none)(,)?)+$ - type: string taskPluginParam: default: Verbose description: Additional parameters for the task plugin @@ -3804,6 +3797,11 @@ spec: description: Size defines the number of node instances format: int32 type: integer + slurmNodeExtra: + description: |- + SlurmNodeExtra defines the string that will be set to the "Extra" field of the corresponding Slurm node. It can + use any environment variables that are available in the slurmd container when it starts. + type: string slurmd: description: Slurmd represents the Slurm daemon service configuration properties: diff --git a/helm/soperator/crds/slurmcluster-crd.yaml b/helm/soperator/crds/slurmcluster-crd.yaml index fcc7d516..a1db1769 100644 --- a/helm/soperator/crds/slurmcluster-crd.yaml +++ b/helm/soperator/crds/slurmcluster-crd.yaml @@ -1476,7 +1476,6 @@ spec: defMemPerNode: 1228800 maxJobCount: 10000 minJobAge: 86400 - taskPlugin: task/cgroup,task/affinity taskPluginParam: Verbose description: SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. @@ -1515,12 +1514,6 @@ spec: time format: int32 type: integer - taskPlugin: - default: task/cgroup,task/affinity - description: Identifies the type of task launch plugin (e.g. pinning - tasks to specific processors) - pattern: ^((task/affinity|task/cgroup|task/none)(,)?)+$ - type: string taskPluginParam: default: Verbose description: Additional parameters for the task plugin @@ -3804,6 +3797,11 @@ spec: description: Size defines the number of node instances format: int32 type: integer + slurmNodeExtra: + description: |- + SlurmNodeExtra defines the string that will be set to the "Extra" field of the corresponding Slurm node. It can + use any environment variables that are available in the slurmd container when it starts. + type: string slurmd: description: Slurmd represents the Slurm daemon service configuration properties: diff --git a/images/worker/slurmd_entrypoint.sh b/images/worker/slurmd_entrypoint.sh index b329f52c..4a506ff7 100755 --- a/images/worker/slurmd_entrypoint.sh +++ b/images/worker/slurmd_entrypoint.sh @@ -1,10 +1,16 @@ #!/bin/bash +set -e # Exit immediately if any command returns a non-zero error code + +echo "Evaluate variables in the Slurm node 'Extra' field" +evaluated_extra=$(eval echo "$SLURM_NODE_EXTRA") + echo "Start slurmd daemon" exec /usr/sbin/slurmd \ -D \ -Z \ --instance-id "${INSTANCE_ID}" \ + --extra "${evaluated_extra}" \ --conf \ "NodeHostname=${K8S_POD_NAME} NodeAddr=${K8S_POD_NAME}.${K8S_SERVICE_NAME}.${K8S_POD_NAMESPACE}.svc.cluster.local RealMemory=${SLURM_REAL_MEMORY} Gres=${GRES}" \ 2>&1 | tee >(multilog s100000000 n5 /var/log/slurm/multilog) diff --git a/internal/render/worker/container.go b/internal/render/worker/container.go index df5f7373..d39cbe18 100644 --- a/internal/render/worker/container.go +++ b/internal/render/worker/container.go @@ -55,6 +55,7 @@ func renderContainerSlurmd( clusterType consts.ClusterType, cgroupVersion string, enableGDRCopy bool, + slurmNodeExtra string, ) (corev1.Container, error) { volumeMounts := []corev1.VolumeMount{ common.RenderVolumeMountSlurmConfigs(), @@ -90,7 +91,14 @@ func renderContainerSlurmd( Name: consts.ContainerNameSlurmd, Image: container.Image, ImagePullPolicy: container.ImagePullPolicy, - Env: renderSlurmdEnv(clusterName, cgroupVersion, clusterType, realMemory, enableGDRCopy), + Env: renderSlurmdEnv( + clusterName, + cgroupVersion, + clusterType, + realMemory, + enableGDRCopy, + slurmNodeExtra, + ), Ports: []corev1.ContainerPort{{ Name: container.Name, ContainerPort: container.Port, @@ -133,7 +141,13 @@ func renderVolumeMountSupervisordConfigMap() corev1.VolumeMount { } } -func renderSlurmdEnv(clusterName, cgroupVersion string, clusterType consts.ClusterType, realMemory int64, enableGDRCopy bool) []corev1.EnvVar { +func renderSlurmdEnv( + clusterName, cgroupVersion string, + clusterType consts.ClusterType, + realMemory int64, + enableGDRCopy bool, + slurmNodeExtra string, +) []corev1.EnvVar { envVar := []corev1.EnvVar{ { Name: "K8S_POD_NAME", @@ -171,6 +185,10 @@ func renderSlurmdEnv(clusterName, cgroupVersion string, clusterType consts.Clust Name: "SLURM_REAL_MEMORY", Value: strconv.FormatInt(realMemory, 10), }, + { + Name: "SLURM_NODE_EXTRA", + Value: slurmNodeExtra, + }, } if cgroupVersion == consts.CGroupV2 { envVar = append(envVar, corev1.EnvVar{ diff --git a/internal/render/worker/container_test.go b/internal/render/worker/container_test.go index 44ddeea8..d6ce6b68 100644 --- a/internal/render/worker/container_test.go +++ b/internal/render/worker/container_test.go @@ -65,7 +65,15 @@ func Test_RenderContainerSlurmd(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := renderContainerSlurmd(tt.container, nil, "test-cluster", consts.ClusterTypeGPU, "v1", false) + got, err := renderContainerSlurmd( + tt.container, + nil, + "test-cluster", + consts.ClusterTypeGPU, + "v1", + false, + "{ \"monitoring\": \"https://my-cloud.com/$INSTANCE_ID/monitoring\" }", + ) if err != nil && tt.wantLimits != nil { t.Errorf("renderContainerSlurmd() error = %v, want nil", err) } diff --git a/internal/render/worker/statefulset.go b/internal/render/worker/statefulset.go index b468f4ef..980816f1 100644 --- a/internal/render/worker/statefulset.go +++ b/internal/render/worker/statefulset.go @@ -66,6 +66,7 @@ func RenderStatefulSet( clusterType, worker.CgroupVersion, worker.EnableGDRCopy, + worker.SlurmNodeExtra, ) if err != nil { return appsv1.StatefulSet{}, fmt.Errorf("rendering slurmd container: %w", err) diff --git a/internal/values/slurm_worker.go b/internal/values/slurm_worker.go index 73501f15..731893c5 100644 --- a/internal/values/slurm_worker.go +++ b/internal/values/slurm_worker.go @@ -21,8 +21,9 @@ type SlurmWorker struct { SupervisordConfigMapDefault bool SupervisordConfigMapName string - CgroupVersion string - EnableGDRCopy bool + CgroupVersion string + EnableGDRCopy bool + SlurmNodeExtra string Service Service StatefulSet StatefulSet @@ -73,6 +74,7 @@ func buildSlurmWorkerFrom( SharedMemorySize: worker.Volumes.SharedMemorySize, CgroupVersion: worker.CgroupVersion, EnableGDRCopy: worker.EnableGDRCopy, + SlurmNodeExtra: worker.SlurmNodeExtra, } for _, jailSubMount := range worker.Volumes.JailSubMounts { subMount := *jailSubMount.DeepCopy() From 16a64574ffa72b55c33510ee956826d6aa582848 Mon Sep 17 00:00:00 2001 From: Pavel Sofronii Date: Mon, 23 Dec 2024 13:03:09 +0100 Subject: [PATCH 10/12] SSHDClientAliveCountMax 10 --- internal/consts/sshd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/consts/sshd.go b/internal/consts/sshd.go index e994551f..bf67dfd1 100644 --- a/internal/consts/sshd.go +++ b/internal/consts/sshd.go @@ -4,7 +4,7 @@ package consts const ( SSHDClientAliveInterval = "9000" // 30 minute - SSHDClientAliveCountMax = "3" + SSHDClientAliveCountMax = "10" SSHDMaxStartups = "10:30:60" SSHDLoginGraceTime = "9000" SSHDMaxAuthTries = "4" From 3a97a35102aded8f502ffaf3a669042dd7e7be8d Mon Sep 17 00:00:00 2001 From: Uburro Date: Mon, 23 Dec 2024 15:48:45 +0100 Subject: [PATCH 11/12] MSP-3782: fix bug slurn.conf --- api/v1/slurmcluster_types.go | 37 ++++---- api/v1/zz_generated.deepcopy.go | 94 ++++++++++++++++++- .../bases/slurm.nebius.ai_slurmclusters.yaml | 3 +- .../templates/slurmcluster-crd.yaml | 3 +- helm/soperator/crds/slurmcluster-crd.yaml | 3 +- internal/render/common/configmap.go | 81 ++++++++-------- 6 files changed, 156 insertions(+), 65 deletions(-) diff --git a/api/v1/slurmcluster_types.go b/api/v1/slurmcluster_types.go index c5181cf0..fcb920fb 100644 --- a/api/v1/slurmcluster_types.go +++ b/api/v1/slurmcluster_types.go @@ -87,39 +87,39 @@ type SlurmConfig struct { // // +kubebuilder:validation:Optional // +kubebuilder:default=1228800 - DefMemPerNode int32 `json:"defMemPerNode,omitempty"` + DefMemPerNode *int32 `json:"defMemPerNode,omitempty"` // Default count of CPUs allocated per allocated GPU // // +kubebuilder:validation:Optional // +kubebuilder:default=16 - DefCpuPerGPU int32 `json:"defCpuPerGPU,omitempty"` + DefCpuPerGPU *int32 `json:"defCpuPerGPU,omitempty"` // The time to wait, in seconds, when any job is in the COMPLETING state before any additional jobs are scheduled. // // +kubebuilder:validation:Optional // +kubebuilder:default=5 - CompleteWait int32 `json:"completeWait,omitempty"` + CompleteWait *int32 `json:"completeWait,omitempty"` // Defines specific subsystems which should provide more detailed event logging. // // +kubebuilder:validation:Optional // +kubebuilder:default="Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs" // +kubebuilder:validation:Pattern="^((Accrue|Agent|AuditRPCs|Backfill|BackfillMap|BurstBuffer|Cgroup|ConMgr|CPU_Bind|CpuFrequency|Data|DBD_Agent|Dependency|Elasticsearch|Energy|Federation|FrontEnd|Gres|Hetjob|Gang|GLOB_SILENCE|JobAccountGather|JobComp|JobContainer|License|Network|NetworkRaw|NodeFeatures|NO_CONF_HASH|Power|Priority|Profile|Protocol|Reservation|Route|Script|SelectType|Steps|Switch|TLS|TraceJobs|Triggers)(,)?)+$" - DebugFlags string `json:"debugFlags,omitempty"` + DebugFlags *string `json:"debugFlags,omitempty"` // Additional parameters for the task plugin // // +kubebuilder:validation:Optional // +kubebuilder:default="Verbose" // +kubebuilder:validation:Pattern="^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$" - TaskPluginParam string `json:"taskPluginParam,omitempty"` + TaskPluginParam *string `json:"taskPluginParam,omitempty"` // Keep N last jobs in controller memory // // +kubebuilder:validation:Optional // +kubebuilder:default=10000 - MaxJobCount int32 `json:"maxJobCount,omitempty"` + MaxJobCount *int32 `json:"maxJobCount,omitempty"` // Don't remove jobs from controller memory after some time // // +kubebuilder:validation:Optional // +kubebuilder:default=86400 - MinJobAge int32 `json:"minJobAge,omitempty"` + MinJobAge *int32 `json:"minJobAge,omitempty"` } type PartitionConfiguration struct { @@ -560,33 +560,32 @@ type SlurmdbdConfig struct { type AccountingSlurmConf struct { // +kubebuilder:validation:Optional - AccountingStorageTRES string `json:"accountingStorageTRES,omitempty"` + AccountingStorageTRES *string `json:"accountingStorageTRES,omitempty"` // +kubebuilder:validation:Optional - AccountingStoreFlags string `json:"accountingStoreFlags,omitempty"` + AccountingStoreFlags *string `json:"accountingStoreFlags,omitempty"` // +kubebuilder:validation:Optional - AcctGatherInterconnectType string `json:"acctGatherInterconnectType,omitempty"` + AcctGatherInterconnectType *string `json:"acctGatherInterconnectType,omitempty"` // +kubebuilder:validation:Optional - AcctGatherFilesystemType string `json:"acctGatherFilesystemType,omitempty"` + AcctGatherFilesystemType *string `json:"acctGatherFilesystemType,omitempty"` // +kubebuilder:validation:Optional - AcctGatherProfileType string `json:"acctGatherProfileType,omitempty"` + AcctGatherProfileType *string `json:"acctGatherProfileType,omitempty"` // +kubebuilder:validation:Optional // +kubebuilder:validation:Enum="jobacct_gather/linux";"jobacct_gather/cgroup";"jobacct_gather/none" - JobAcctGatherType string `json:"jobAcctGatherType,omitempty"` + JobAcctGatherType *string `json:"jobAcctGatherType,omitempty"` // +kubebuilder:validation:Optional // +kubebuilder:default=30 - JobAcctGatherFrequency int `json:"jobAcctGatherFrequency,omitempty"` + JobAcctGatherFrequency *int `json:"jobAcctGatherFrequency,omitempty"` // +kubebuilder:validation:Optional // +kubebuilder:validation:Enum="NoShared";"UsePss";"OverMemoryKill";"DisableGPUAcct" - JobAcctGatherParams string `json:"jobAcctGatherParams,omitempty"` + JobAcctGatherParams *string `json:"jobAcctGatherParams,omitempty"` // +kubebuilder:validation:Optional // +kubebuilder:default=0 - PriorityWeightAge int16 `json:"priorityWeightAge,omitempty"` + PriorityWeightAge *int16 `json:"priorityWeightAge,omitempty"` // +kubebuilder:validation:Optional // +kubebuilder:default=0 - PriorityWeightFairshare int16 `json:"priorityWeightFairshare,omitempty"` + PriorityWeightFairshare *int16 `json:"priorityWeightFairshare,omitempty"` // +kubebuilder:validation:Optional - // +kubebuilder:default=0 - PriorityWeightTRES int16 `json:"priorityWeightTRES,omitempty"` + PriorityWeightTRES *string `json:"priorityWeightTRES,omitempty"` } // SlurmNodeController defines the configuration for the Slurm controller node diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 6ccc3853..56d5d2a1 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -15,6 +15,61 @@ import ( // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *AccountingSlurmConf) DeepCopyInto(out *AccountingSlurmConf) { *out = *in + if in.AccountingStorageTRES != nil { + in, out := &in.AccountingStorageTRES, &out.AccountingStorageTRES + *out = new(string) + **out = **in + } + if in.AccountingStoreFlags != nil { + in, out := &in.AccountingStoreFlags, &out.AccountingStoreFlags + *out = new(string) + **out = **in + } + if in.AcctGatherInterconnectType != nil { + in, out := &in.AcctGatherInterconnectType, &out.AcctGatherInterconnectType + *out = new(string) + **out = **in + } + if in.AcctGatherFilesystemType != nil { + in, out := &in.AcctGatherFilesystemType, &out.AcctGatherFilesystemType + *out = new(string) + **out = **in + } + if in.AcctGatherProfileType != nil { + in, out := &in.AcctGatherProfileType, &out.AcctGatherProfileType + *out = new(string) + **out = **in + } + if in.JobAcctGatherType != nil { + in, out := &in.JobAcctGatherType, &out.JobAcctGatherType + *out = new(string) + **out = **in + } + if in.JobAcctGatherFrequency != nil { + in, out := &in.JobAcctGatherFrequency, &out.JobAcctGatherFrequency + *out = new(int) + **out = **in + } + if in.JobAcctGatherParams != nil { + in, out := &in.JobAcctGatherParams, &out.JobAcctGatherParams + *out = new(string) + **out = **in + } + if in.PriorityWeightAge != nil { + in, out := &in.PriorityWeightAge, &out.PriorityWeightAge + *out = new(int16) + **out = **in + } + if in.PriorityWeightFairshare != nil { + in, out := &in.PriorityWeightFairshare, &out.PriorityWeightFairshare + *out = new(int16) + **out = **in + } + if in.PriorityWeightTRES != nil { + in, out := &in.PriorityWeightTRES, &out.PriorityWeightTRES + *out = new(string) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AccountingSlurmConf. @@ -517,7 +572,7 @@ func (in *SlurmClusterSpec) DeepCopyInto(out *SlurmClusterSpec) { (*in).DeepCopyInto(*out) } in.PartitionConfiguration.DeepCopyInto(&out.PartitionConfiguration) - out.SlurmConfig = in.SlurmConfig + in.SlurmConfig.DeepCopyInto(&out.SlurmConfig) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SlurmClusterSpec. @@ -560,6 +615,41 @@ func (in *SlurmClusterStatus) DeepCopy() *SlurmClusterStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SlurmConfig) DeepCopyInto(out *SlurmConfig) { *out = *in + if in.DefMemPerNode != nil { + in, out := &in.DefMemPerNode, &out.DefMemPerNode + *out = new(int32) + **out = **in + } + if in.DefCpuPerGPU != nil { + in, out := &in.DefCpuPerGPU, &out.DefCpuPerGPU + *out = new(int32) + **out = **in + } + if in.CompleteWait != nil { + in, out := &in.CompleteWait, &out.CompleteWait + *out = new(int32) + **out = **in + } + if in.DebugFlags != nil { + in, out := &in.DebugFlags, &out.DebugFlags + *out = new(string) + **out = **in + } + if in.TaskPluginParam != nil { + in, out := &in.TaskPluginParam, &out.TaskPluginParam + *out = new(string) + **out = **in + } + if in.MaxJobCount != nil { + in, out := &in.MaxJobCount, &out.MaxJobCount + *out = new(int32) + **out = **in + } + if in.MinJobAge != nil { + in, out := &in.MinJobAge, &out.MinJobAge + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SlurmConfig. @@ -632,7 +722,7 @@ func (in *SlurmNodeAccounting) DeepCopyInto(out *SlurmNodeAccounting) { out.ExternalDB = in.ExternalDB in.MariaDbOperator.DeepCopyInto(&out.MariaDbOperator) out.SlurmdbdConfig = in.SlurmdbdConfig - out.SlurmConfig = in.SlurmConfig + in.SlurmConfig.DeepCopyInto(&out.SlurmConfig) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SlurmNodeAccounting. diff --git a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml index 48c43b23..9430a7fa 100644 --- a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml +++ b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml @@ -2077,8 +2077,7 @@ spec: default: 0 type: integer priorityWeightTRES: - default: 0 - type: integer + type: string type: object slurmdbd: description: Slurmdbd represents the Slurm database daemon diff --git a/helm/soperator-crds/templates/slurmcluster-crd.yaml b/helm/soperator-crds/templates/slurmcluster-crd.yaml index a1db1769..1c569c07 100644 --- a/helm/soperator-crds/templates/slurmcluster-crd.yaml +++ b/helm/soperator-crds/templates/slurmcluster-crd.yaml @@ -2076,8 +2076,7 @@ spec: default: 0 type: integer priorityWeightTRES: - default: 0 - type: integer + type: string type: object slurmdbd: description: Slurmdbd represents the Slurm database daemon diff --git a/helm/soperator/crds/slurmcluster-crd.yaml b/helm/soperator/crds/slurmcluster-crd.yaml index a1db1769..1c569c07 100644 --- a/helm/soperator/crds/slurmcluster-crd.yaml +++ b/helm/soperator/crds/slurmcluster-crd.yaml @@ -2076,8 +2076,7 @@ spec: default: 0 type: integer priorityWeightTRES: - default: 0 - type: integer + type: string type: object slurmdbd: description: Slurmdbd represents the Slurm database daemon diff --git a/internal/render/common/configmap.go b/internal/render/common/configmap.go index 3655edab..c834dbcc 100644 --- a/internal/render/common/configmap.go +++ b/internal/render/common/configmap.go @@ -56,19 +56,7 @@ func generateSlurmConfig(cluster *values.SlurmCluster) renderutils.ConfigFile { res.AddProperty("CredType", "cred/"+consts.Munge) res.AddComment("") res.AddComment("SlurnConfig Spec") - v := reflect.ValueOf(cluster.SlurmConfig) - t := reflect.TypeOf(cluster.SlurmConfig) - - for i := 0; i < v.NumField(); i++ { - field := v.Field(i) - fieldName := t.Field(i).Name - - if field.Kind() == reflect.String && field.String() == "" { - continue - } - - res.AddProperty(fieldName, field.Interface()) - } + addSlurmConfigProperties(res, cluster.SlurmConfig) res.AddComment("") if cluster.ClusterType == consts.ClusterTypeGPU { res.AddProperty("GresTypes", "gpu") @@ -97,6 +85,7 @@ func generateSlurmConfig(cluster *values.SlurmCluster) renderutils.ConfigFile { res.AddProperty("LaunchParameters", "use_interactive_step") res.AddComment("Scrontab") res.AddProperty("ScronParameters", "enable,explicit_scancel") + res.AddComment("") res.AddProperty("PropagateResourceLimits", "NONE") // Don't propagate ulimits from the login node by default res.AddComment("") res.AddComment("HEALTH CHECKS") @@ -154,21 +143,8 @@ func generateSlurmConfig(cluster *values.SlurmCluster) renderutils.ConfigFile { // In slurm.conf, the accounting section has many optional values // that can be added or removed, and to avoid writing many if statements, we decided to use a reflector. - v := reflect.ValueOf(cluster.NodeAccounting.SlurmConfig) - typeOfS := v.Type() - for i := 0; i < v.NumField(); i++ { - field := v.Field(i) - if !isZero(field) { - key := typeOfS.Field(i).Name - switch field.Kind() { - case reflect.String: - res.AddProperty(key, field.String()) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - res.AddProperty(key, field.Int()) - } + addSlurmConfigProperties(res, cluster.NodeAccounting.SlurmConfig) - } - } if cluster.NodeRest.Enabled { res.AddComment("") res.AddComment("REST API") @@ -179,6 +155,46 @@ func generateSlurmConfig(cluster *values.SlurmCluster) renderutils.ConfigFile { return res } +// addSlurmConfigProperties adds properties from the given struct to the config file +func addSlurmConfigProperties(res *renderutils.PropertiesConfig, config interface{}) { + v := reflect.ValueOf(config) + if v.Kind() == reflect.Pointer { + if v.IsNil() { + return + } + v = v.Elem() + } + + if v.Kind() != reflect.Struct { + return + } + + t := v.Type() + for i := 0; i < v.NumField(); i++ { + field := v.Field(i) + fieldName := t.Field(i).Name + + if field.Kind() == reflect.Pointer { + if field.IsNil() { + continue + } + field = field.Elem() + } + + if field.Kind() == reflect.String { + if field.String() != "" { + res.AddProperty(fieldName, field.String()) + } + continue + } + + if field.Kind() == reflect.Int32 || field.Kind() == reflect.Int16 { + res.AddProperty(fieldName, field.Int()) + continue + } + } +} + func generateCGroupConfig(cluster *values.SlurmCluster) renderutils.ConfigFile { res := &renderutils.PropertiesConfig{} res.AddProperty("CgroupMountpoint", "/sys/fs/cgroup") @@ -214,17 +230,6 @@ func generateGresConfig(clusterType consts.ClusterType) renderutils.ConfigFile { return res } -func isZero(v reflect.Value) bool { - switch v.Kind() { - case reflect.String: - return v.Len() == 0 - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return v.Int() == 0 - } - - return false -} - // region Security limits // RenderConfigMapSecurityLimits renders new [corev1.ConfigMap] containing security limits config file From 83cb53e09d3e817c1a148688ad3e960fc8265f41 Mon Sep 17 00:00:00 2001 From: Pavel Sofronii Date: Mon, 23 Dec 2024 17:20:02 +0100 Subject: [PATCH 12/12] bump soperator 1.16.1 --- VERSION | 2 +- config/manager/kustomization.yaml | 2 +- config/manager/manager.yaml | 2 +- helm/slurm-cluster-storage/Chart.yaml | 4 ++-- helm/slurm-cluster/Chart.yaml | 4 ++-- helm/slurm-cluster/values.yaml | 32 +++++++++++++-------------- helm/soperator-crds/Chart.yaml | 4 ++-- helm/soperator/Chart.yaml | 4 ++-- helm/soperator/values.yaml | 2 +- internal/consts/version.go | 2 +- 10 files changed, 29 insertions(+), 29 deletions(-) diff --git a/VERSION b/VERSION index 15b989e3..41c11ffb 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.16.0 +1.16.1 diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 996370df..248bfe16 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -3,4 +3,4 @@ resources: images: - name: controller newName: cr.eu-north1.nebius.cloud/soperator/slurm-operator - newTag: 1.16.0 + newTag: 1.16.1 diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 4c7d922d..8c539ef9 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -84,7 +84,7 @@ spec: value: "false" - name: SLURM_OPERATOR_WATCH_NAMESPACES value: "*" - image: controller:1.16.0 + image: controller:1.16.1 imagePullPolicy: Always name: manager securityContext: diff --git a/helm/slurm-cluster-storage/Chart.yaml b/helm/slurm-cluster-storage/Chart.yaml index 4f0c01a9..ee7a02d6 100644 --- a/helm/slurm-cluster-storage/Chart.yaml +++ b/helm/slurm-cluster-storage/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: helm-slurm-cluster-storage description: A Helm chart for Kubernetes type: application -version: "1.16.0" -appVersion: "1.16.0" +version: "1.16.1" +appVersion: "1.16.1" diff --git a/helm/slurm-cluster/Chart.yaml b/helm/slurm-cluster/Chart.yaml index 8fe0fa2a..b3966f7f 100644 --- a/helm/slurm-cluster/Chart.yaml +++ b/helm/slurm-cluster/Chart.yaml @@ -2,6 +2,6 @@ apiVersion: v2 name: helm-slurm-cluster description: A Helm chart for Kubernetes type: application -version: "1.16.0" -appVersion: "1.16.0" +version: "1.16.1" +appVersion: "1.16.1" kubeVersion: ">=1.29.0-0" diff --git a/helm/slurm-cluster/values.yaml b/helm/slurm-cluster/values.yaml index 3c9fa08e..3e4c1a01 100644 --- a/helm/slurm-cluster/values.yaml +++ b/helm/slurm-cluster/values.yaml @@ -117,13 +117,13 @@ periodicChecks: imagePullPolicy: "IfNotPresent" appArmorProfile: "unconfined" slurmConfig: {} - # defMemPerNode: 1228800 - # defCpuPerGPU: 16 - # completeWait: 5 - # debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs" - # taskPluginParam: "Verbose" - # maxJobCount: 10000 - # minJobAge: 86400 +# defMemPerNode: 1228800 +# defCpuPerGPU: 16 +# completeWait: 5 +# debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs" +# taskPluginParam: "Verbose" +# maxJobCount: 10000 +# minJobAge: 86400 slurmNodes: accounting: enabled: false @@ -393,13 +393,13 @@ telemetry: {} # otelCollectorPort: 8429 images: - slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.16.0-jammy-slurm24.05.2" - slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.16.0-jammy-slurm24.05.2" - slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.16.0-jammy-slurm24.05.2" - sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.16.0-jammy-slurm24.05.2" - munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.16.0-jammy-slurm24.05.2" - populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.16.0-jammy-slurm24.05.2" - ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.16.0-jammy-slurm24.05.2" - slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.16.0-jammy-slurm24.05.2" - exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.16.0-jammy-slurm24.05.2" + slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.16.1-jammy-slurm24.05.2" + slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.16.1-jammy-slurm24.05.2" + slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.16.1-jammy-slurm24.05.2" + sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.16.1-jammy-slurm24.05.2" + munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.16.1-jammy-slurm24.05.2" + populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.16.1-jammy-slurm24.05.2" + ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.16.1-jammy-slurm24.05.2" + slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.16.1-jammy-slurm24.05.2" + exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.16.1-jammy-slurm24.05.2" mariaDB: "docker-registry1.mariadb.com/library/mariadb:11.4.3" diff --git a/helm/soperator-crds/Chart.yaml b/helm/soperator-crds/Chart.yaml index e2acc630..f12b6e5f 100644 --- a/helm/soperator-crds/Chart.yaml +++ b/helm/soperator-crds/Chart.yaml @@ -2,6 +2,6 @@ apiVersion: v2 name: helm-soperator-crds description: A Helm chart for Kubernetes type: application -version: 1.16.0 -appVersion: "1.16.0" +version: 1.16.1 +appVersion: "1.16.1" kubeVersion: ">=1.29.0-0" diff --git a/helm/soperator/Chart.yaml b/helm/soperator/Chart.yaml index e3713fa5..ef2dcdce 100644 --- a/helm/soperator/Chart.yaml +++ b/helm/soperator/Chart.yaml @@ -2,6 +2,6 @@ apiVersion: v2 name: helm-soperator description: A Helm chart for Kubernetes type: application -version: 1.16.0 -appVersion: "1.16.0" +version: 1.16.1 +appVersion: "1.16.1" kubeVersion: ">=1.29.0-0" diff --git a/helm/soperator/values.yaml b/helm/soperator/values.yaml index e7ebcbd2..f1edc57b 100644 --- a/helm/soperator/values.yaml +++ b/helm/soperator/values.yaml @@ -36,7 +36,7 @@ controllerManager: slurmOperatorWatchNamespaces: '*' image: repository: cr.eu-north1.nebius.cloud/soperator/slurm-operator - tag: 1.16.0 + tag: 1.16.1 imagePullPolicy: Always resources: limits: diff --git a/internal/consts/version.go b/internal/consts/version.go index 18b64cee..3c851466 100644 --- a/internal/consts/version.go +++ b/internal/consts/version.go @@ -2,5 +2,5 @@ package consts const ( - VersionCR = "1.16.0" + VersionCR = "1.16.1" )