From 0eb037b4c6fb298be7345b170d2b68b8e403c223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Mon, 4 Nov 2024 15:52:23 +0100 Subject: [PATCH 1/3] feat(conf): get alloc[_idle]_cpus from slurmrestd Select alloc_cpus and alloc_idle_cpus nodes fields on slurmrestd /slurm/*/nodes and /slurm/*/node/ endpoints. These fields are especially useful to count exactly the number of idle/allocated cores on mixed nodes. --- CHANGELOG.md | 2 ++ conf/vendor/agent.yml | 3 +++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e4755e2..fb7dfec5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `metrics` > `restrict` parameter for the agent. - Add `ui` > `templates`, `message_template`, `message_login` parameters for the gateway. + - Select `alloc_cpus` and `alloc_idle_cpus` nodes fields on `slurmrestd` + `/slurm/*/nodes` and `/slurm/*/node/` endpoints. - Introduce service message template. - show-conf: Introduce `slurm-web-show-conf` utility to dump current configuration settings of gateway and agent components with their origin, diff --git a/conf/vendor/agent.yml b/conf/vendor/agent.yml index a3d55ffc..72ac3ab0 100644 --- a/conf/vendor/agent.yml +++ b/conf/vendor/agent.yml @@ -148,6 +148,8 @@ filters: - state - reason - partitions + - alloc_cpus + - alloc_idle_cpus doc: | List of nodes fields selected in slurmrestd API, all other fields are filtered out. @@ -169,6 +171,7 @@ filters: - reason - partitions - alloc_cpus + - alloc_idle_cpus - alloc_memory doc: | List of invidual node fields selected in slurmrestd API, all other fields From 84c6f23ba43e41f535d06ec8e7273aab305ee59f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Mon, 4 Nov 2024 15:47:50 +0100 Subject: [PATCH 2/3] fix(agent): metrics exact allocated cores Report exact number of allocated/idle cores instead of considering all cores mixed on partially allocated nodes. --- slurmweb/slurmrestd/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/slurmweb/slurmrestd/__init__.py b/slurmweb/slurmrestd/__init__.py index d6b37b71..07f0a948 100644 --- a/slurmweb/slurmrestd/__init__.py +++ b/slurmweb/slurmrestd/__init__.py @@ -145,7 +145,6 @@ def nodes_cores_states(self): } cores_states = { "idle": 0, - "mixed": 0, "allocated": 0, "down": 0, "drain": 0, @@ -157,7 +156,9 @@ def nodes_cores_states(self): cores = node["cpus"] if "MIXED" in node["state"]: nodes_states["mixed"] += 1 - cores_states["mixed"] += cores + # Look at number of actually allocated/idle cores + cores_states["allocated"] += node["alloc_cpus"] + cores_states["idle"] += node["alloc_idle_cpus"] elif "ALLOCATED" in node["state"]: nodes_states["allocated"] += 1 cores_states["allocated"] += cores From 937d374cd6d6200421b1e387a886f1295ad5b1b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Mon, 4 Nov 2024 15:54:24 +0100 Subject: [PATCH 3/3] docs: update conf references --- docs/modules/conf/examples/agent.ini | 6 ++++++ docs/modules/conf/partials/conf-agent.adoc | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/docs/modules/conf/examples/agent.ini b/docs/modules/conf/examples/agent.ini index b43d361f..4fcfbcc0 100644 --- a/docs/modules/conf/examples/agent.ini +++ b/docs/modules/conf/examples/agent.ini @@ -208,6 +208,8 @@ ctldjob= # - state # - reason # - partitions +# - alloc_cpus +# - alloc_idle_cpus nodes= name cpus @@ -217,6 +219,8 @@ nodes= state reason partitions + alloc_cpus + alloc_idle_cpus # List of invidual node fields selected in slurmrestd API, all other fields # are filtered out. @@ -236,6 +240,7 @@ nodes= # - reason # - partitions # - alloc_cpus +# - alloc_idle_cpus # - alloc_memory node= name @@ -252,6 +257,7 @@ node= reason partitions alloc_cpus + alloc_idle_cpus alloc_memory # List of partitions fields selected in slurmrestd API, all other fields are diff --git a/docs/modules/conf/partials/conf-agent.adoc b/docs/modules/conf/partials/conf-agent.adoc index d6c9c686..0e045ad0 100644 --- a/docs/modules/conf/partials/conf-agent.adoc +++ b/docs/modules/conf/partials/conf-agent.adoc @@ -340,6 +340,10 @@ filtered out. * `partitions` +* `alloc_cpus` + +* `alloc_idle_cpus` + |- @@ -383,6 +387,8 @@ are filtered out. * `alloc_cpus` +* `alloc_idle_cpus` + * `alloc_memory`