Skip to content

Commit

Permalink
feat(agent): check slurm version
Browse files Browse the repository at this point in the history
Check Slurm version as returned by slurmrestd against hard-coded minimal
version and log error if not greater or equal. This should help users to
understand the status with old Slurm versions with clear error message
rather than weird and unexpected behaviors.
  • Loading branch information
rezib committed Aug 30, 2024
1 parent 3e7c525 commit c44206f
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Changed
- frontend: Add intermediate cluster list width to 80% on large screens, before
going down to 60% on even larger screens.
- agent: Check Slurm version returned from `slurmrestd` against hard-coded
minimal version and log error if not greater or equal.
- pkgs: Add requirement on RFL.core and RFL.authentication >= 1.0.3.
- docs: Update configuration reference documentation.

Expand Down
23 changes: 21 additions & 2 deletions slurmweb/views/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from ..errors import SlurmwebCacheError, SlurmwebRestdError
from . import SlurmrestdUnixAdapter

# Tuple used for comparaison with Slurm version retrieved from slurmrestd and
# check for minimal supported version.
MINIMAL_SLURM_VERSION = (23, 2, 0)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -137,7 +140,7 @@ def _cached_data(cache_key: str, expiration: int, func: Callable, *args: List[An
def _get_version():
return slurmrest(f"/slurm/v{current_app.settings.slurmrestd.version}/ping", "meta")[
"Slurm"
]["release"]
]


def _cached_version():
Expand Down Expand Up @@ -284,6 +287,22 @@ def _cached_accounts():
def stats():
total = 0
running = 0

version = _cached_version()
logger.info("Retrieved version %s", version)
# Check Slurm version is supported or fail with HTTP/500
if (
not (
version["version"]["major"],
version["version"]["minor"],
version["version"]["micro"],
)
>= MINIMAL_SLURM_VERSION
):
error = f"Unsupported Slurm version {version['release']}"
logger.error(error)
abort(500, error)

for job in _cached_jobs():
total += 1
if "RUNNING" in job["job_state"]:
Expand All @@ -296,7 +315,7 @@ def stats():
cores += node["cpus"]
return jsonify(
{
"version": _cached_version(),
"version": version["release"],
"resources": {"nodes": nodes, "cores": cores},
"jobs": {"running": running, "total": total},
}
Expand Down

0 comments on commit c44206f

Please sign in to comment.