From c44206fe0471702f16cd680f8a96002b38912e6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Fri, 30 Aug 2024 16:56:42 +0200 Subject: [PATCH] feat(agent): check slurm version Check Slurm version as returned by slurmrestd against hard-coded minimal version and log error if not greater or equal. This should help users to understand the status with old Slurm versions with clear error message rather than weird and unexpected behaviors. --- CHANGELOG.md | 2 ++ slurmweb/views/agent.py | 23 +++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ad9c318..95995802 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Changed - frontend: Add intermediate cluster list width to 80% on large screens, before going down to 60% on even larger screens. +- agent: Check Slurm version returned from `slurmrestd` against hard-coded + minimal version and log error if not greater or equal. - pkgs: Add requirement on RFL.core and RFL.authentication >= 1.0.3. - docs: Update configuration reference documentation. diff --git a/slurmweb/views/agent.py b/slurmweb/views/agent.py index 419fc88e..4145ef4a 100644 --- a/slurmweb/views/agent.py +++ b/slurmweb/views/agent.py @@ -15,6 +15,9 @@ from ..errors import SlurmwebCacheError, SlurmwebRestdError from . import SlurmrestdUnixAdapter +# Tuple used for comparaison with Slurm version retrieved from slurmrestd and +# check for minimal supported version. +MINIMAL_SLURM_VERSION = (23, 2, 0) logger = logging.getLogger(__name__) @@ -137,7 +140,7 @@ def _cached_data(cache_key: str, expiration: int, func: Callable, *args: List[An def _get_version(): return slurmrest(f"/slurm/v{current_app.settings.slurmrestd.version}/ping", "meta")[ "Slurm" - ]["release"] + ] def _cached_version(): @@ -284,6 +287,22 @@ def _cached_accounts(): def stats(): total = 0 running = 0 + + version = _cached_version() + logger.info("Retrieved version %s", version) + # Check Slurm version is supported or fail with HTTP/500 + if ( + not ( + version["version"]["major"], + version["version"]["minor"], + version["version"]["micro"], + ) + >= MINIMAL_SLURM_VERSION + ): + error = f"Unsupported Slurm version {version['release']}" + logger.error(error) + abort(500, error) + for job in _cached_jobs(): total += 1 if "RUNNING" in job["job_state"]: @@ -296,7 +315,7 @@ def stats(): cores += node["cpus"] return jsonify( { - "version": _cached_version(), + "version": version["release"], "resources": {"nodes": nodes, "cores": cores}, "jobs": {"running": running, "total": total}, }