From 83d87d3f6e078f7ab086ff5ec3506bca6478a44f Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Tue, 1 Oct 2024 09:40:19 -0400 Subject: [PATCH 1/6] feat(slurm_ops): add initial jwt key support Add initial support for managing jwt keys with the slurm_ops charm library. Currently relies on the cryptography library for generating an RSA key that is used to sign Slurm communications, but we'll eventually be made to rely on a dedicated `jwtctl` tool that can handle the key for us. Signed-off-by: Jason C. Nucciarone --- dev-requirements.txt | 1 + lib/charms/hpc_libs/v0/slurm_ops.py | 50 ++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 9284b31..f587bd1 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -3,6 +3,7 @@ slurmutils ~= 0.7.0 python-dotenv ~= 1.0.1 pyyaml >= 6.0.2 distro ~=1.9.0 +cryptography ~= 43.0.1 # tests deps coverage[toml] ~= 7.6 diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index 5f95481..61a1f62 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -73,6 +73,8 @@ def _on_install(self, _) -> None: import distro import dotenv import yaml +from cryptography.hazmat.primitives import serialization +from cryptography.hazmat.primitives.asymmetric import rsa from slurmutils.editors import cgroupconfig, slurmconfig, slurmdbdconfig from slurmutils.models import CgroupConfig, SlurmConfig, SlurmdbdConfig @@ -96,7 +98,13 @@ def _on_install(self, _) -> None: LIBPATCH = 7 # Charm library dependencies to fetch during `charmcraft pack`. -PYDEPS = ["pyyaml>=6.0.2", "python-dotenv~=1.0.1", "slurmutils~=0.7.0", "distro~=1.9.0"] +PYDEPS = [ + "cryptography~=43.0.1", + "pyyaml>=6.0.2", + "python-dotenv~=1.0.1", + "slurmutils~=0.7.0", + "distro~=1.9.0", +] _logger = logging.getLogger(__name__) @@ -549,6 +557,9 @@ def install(self) -> None: raise SlurmOpsError(f"failed to install {self._service_name}. reason: {e}") self._env_file.touch(exist_ok=True) + # Debian package postinst hook does not create a `StateSaveLocation` directory + # so we make one here that is only r/w by owner. + Path("/var/lib/slurm/slurm.state").mkdir(mode=0o600, exist_ok=True) if self._service_name == "slurmd": override = Path("/etc/systemd/system/slurmd.service.d/10-slurmd-conf-server.conf") @@ -584,6 +595,42 @@ def _env_manager_for(self, type: _ServiceType) -> _EnvManager: return _EnvManager(file=self._env_file, prefix=type.value) +class _JWTKeyManager: + """Control the JWT key used by Slurm. + + Todo: + Use `jwtctl` to provide backend for generating, setting, and getting + JWT key used by `slurmctld` and `slurmrestd`. This way we won't need to + pass the `snap` bool as an argument to `__init__` + """ + + def __init__(self, snap: bool = False): + self._keyfile_path = ( + Path("/var/snap/slurm/common/var/lib/slurm/slurm.state/jwt_hs256.key") + if snap + else Path("/var/lib/slurm/slurm.state/jwt_hs256.key") + ) + + def get(self) -> str: + """Get the current jwt key.""" + return self._keyfile_path.read_text() + + def set(self, key: str) -> None: + """Set a new jwt key.""" + self._keyfile_path.write_text(key) + + def generate(self) -> None: + """Generate a new, cryptographically secure jwt key.""" + key = rsa.generate_private_key(public_exponent=65537, key_size=2048) + self.set( + key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption(), + ).decode() + ) + + class _MungeKeyManager: """Control the munge key via `mungectl ...` commands.""" @@ -633,6 +680,7 @@ def __init__(self, service: _ServiceType, snap: bool = False) -> None: self._ops_manager = _SnapManager() if snap else _AptManager(service) self.service = self._ops_manager.service_manager_for(service) self.munge = _MungeManager(self._ops_manager) + self.jwt = _JWTKeyManager(snap) self.exporter = _PrometheusExporterManager(self._ops_manager) self.install = self._ops_manager.install self.version = self._ops_manager.version From 2326c69d2c06842dfa81b79426e2de3ac0b4ac5f Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Tue, 1 Oct 2024 09:42:08 -0400 Subject: [PATCH 2/6] tests(slurm_ops): add unit tests for jwt key manager Signed-off-by: Jason C. Nucciarone --- tests/unit/test_slurm_ops.py | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index 5861cdb..6d2fe5f 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -25,6 +25,34 @@ MUNGEKEY = b"1234567890" MUNGEKEY_BASE64 = base64.b64encode(MUNGEKEY) +JWT_KEY = """-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEAt3PLWkwUOeckDwyMpHgGqmOZhitC8KfOQY/zPWfo+up5RQXz +gVWqsTIt1RWynxIwCGeKYfVlhoKNDEDL1ZjYPcrrGBgMEC8ifqxkN4RC8bwwaGrJ +9Zf0kknPHI5AJ9Fkv6EjgAZW1lwV0uEE5kf0wmlgfThXfzwwGVHVwemE1EgUzdI/ +rVxFP5Oe+mRM7kWdtXQrfizGhfmr8laCs+dgExpPa37mk7u/3LZfNXXSWYiaNtie +vax5BxmI4bnTIXxdTT4VP9rMxG8nSspVj5NSWcplKUANlIkMKiO7k/CCD/YzRzM0 +0yZttiTvECG+rKy+KJd97dbtj6wSvbJ7cjfq2wIDAQABAoIBACNTfPkqZUqxI9Ry +CjMxmbb97vZTJlTJO4KMgb51X/vRYwDToIxrPq9YhlLeFsNi8TTtG0y5wI8iXJ7b +a2T6RcnAZX0CRHBpYy8Za0L1iR6bqoaw6asNU99Hr0ZEbj48qDXuhbOFhPtKSDmP +cy4U9SDqwdXbH540rN5zT8JDgXyPAVJpwgsShk7rhgOFGIPIZqQoxEjPV3jr1sbk +k7c39fJR6Kxywppn7flSmNX3v1LDu4NDIp0Llt1NlcKlbdy5XWEW9IbiIYi3JTpB +kMpkFQFIuUyledeFyVFPsP8O7Da2rZS6Fb1dYNWzh3WkDRiAwYgTspiYiSf4AAi4 +TgrOmiECgYEA312O5bXqXOapU+S2yAFRTa8wkZ1iRR2E66NypZKVsv/vfe0bO+WQ +kI6MRmTluvOKsKe3JulJZpjbl167gge45CHnFPZxEODAJN6OYp+Z4aOvTYBWQPpO +A75AGSheL66PWe4d+ZGvxYCZB5vf4THAs8BsGlFK04RKL1vHADkUjHUCgYEA0kFh +2ei/NP8ODrwygjrpjYSc2OSH9tBUoB7y5zIfLsXshb3Fn4pViF9vl01YkJJ57kki +KQm7rgqCsFnKS4oUFbjDDFbo351m1e3XRbPAATIiqtJmtLoLoSWuhXpsCbneM5bB +xLhFmm8RcFC6ORPBE2WMTGYzTEKydhImvUo+8A8CgYEAssWpyjaoRgSjP68Nj9Rm +Izv1LoZ9kX3H1eUyrEw/Hk3ze6EbK/xXkStWID0/FTs5JJyHXVBX3BK5plQ+1Rqj +I4vy7Hc2FWEcyCWMZmkA+3RLqUbvQgBUEnDh0oDZqWYX+802FnpA6V08nbdnH1D3 +v6Zhn0qzDcmSqobVJluJE8UCgYB93FO1/QSQtel1WqUlnhx28Z5um4bkcVtnKn+f +dDqEZkiq2qn1UfrXksGbIdrVWEmTIcZIKKJnkbUf2fAl/fb99ccUmOX4DiIkB6co ++2wBi0CDX0XKA+C4S3VIQ7tuqwvfd+xwVRqdUsVupXSEfFXExbIRfdBRY0+vLDhy +cYJxcwKBgQCK+dW+F0UJTQq1rDxfI0rt6yuRnhtSdAq2+HbXNx/0nwdLQg7SubWe +1QnLcdjnBNxg0m3a7S15nyO2xehvB3rhGeWSfOrHYKJNX7IUqluVLJ+lIwgE2eAz +94qOCvkFCP3pnm/MKN6/rezyOzrVJn7GbyDhcjElu+DD+WRLjfxiSw== +-----END RSA PRIVATE KEY----- +""" SLURM_INFO = """ name: slurm summary: "Slurm: A Highly Scalable Workload Manager" @@ -132,6 +160,9 @@ class SlurmOpsBase: def setUp(self): self.setUpPyfakefs() self.fs.create_file("/var/snap/slurm/common/.env") + self.fs.create_file( + "/var/snap/slurm/common/var/lib/slurm/slurm.state/jwt_hs256.key", contents=JWT_KEY + ) def test_config_name(self, *_) -> None: """Test that the config name is correctly set.""" @@ -203,6 +234,20 @@ def test_configure_munge(self, *_) -> None: self.manager.munge.max_thread_count = 24 self.assertEqual(self.manager.munge.max_thread_count, 24) + def test_get_jwt_key(self, *_) -> None: + """Test that the jwt key is properly retrieved.""" + self.assertEqual(self.manager.jwt.get(), JWT_KEY) + + def test_set_jwt_key(self, *_) -> None: + """Test that the jwt key is set correctly.""" + self.manager.jwt.set(JWT_KEY) + self.assertEqual(self.manager.jwt.get(), JWT_KEY) + + def test_generate_jwt_key(self, *_) -> None: + """Test that the jwt key is properly generated.""" + self.manager.jwt.generate() + self.assertNotEqual(self.manager.jwt.get(), JWT_KEY) + @patch("charms.hpc_libs.v0.slurm_ops.socket.gethostname") def test_hostname(self, gethostname, *_) -> None: """Test that manager is able to correctly get the host name.""" From 704777db8ab14ee47401c92ddf99eb73e2d5832a Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Tue, 1 Oct 2024 11:38:17 -0400 Subject: [PATCH 3/6] tests(slurm_ops): fix tests on py310 by explicitly mocking `_keyfile_path` Signed-off-by: Jason C. Nucciarone --- tests/unit/test_slurm_ops.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index 6d2fe5f..c1a39f4 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -7,6 +7,7 @@ import base64 import subprocess import textwrap +from pathlib import Path from unittest import TestCase from unittest.mock import patch @@ -160,9 +161,11 @@ class SlurmOpsBase: def setUp(self): self.setUpPyfakefs() self.fs.create_file("/var/snap/slurm/common/.env") - self.fs.create_file( - "/var/snap/slurm/common/var/lib/slurm/slurm.state/jwt_hs256.key", contents=JWT_KEY + self.fs.create_file("/var/snap/slurm/common/var/lib/slurm/slurm.state/jwt_hs256.key") + self.manager.jwt._keyfile_path = Path( + "/var/snap/slurm/common/var/lib/slurm/slurm.state/jwt_hs256.key" ) + self.manager.jwt._keyfile_path.write_text(JWT_KEY) def test_config_name(self, *_) -> None: """Test that the config name is correctly set.""" From 0b6c1976323e4717e98a8f65b3990bf4d2fc1222 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Tue, 1 Oct 2024 15:20:11 -0400 Subject: [PATCH 4/6] feat(slurm_ops): add `var_lib_path` property to get the variable state data directory Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 29 ++++++++++++++++++++--------- tests/unit/test_slurm_ops.py | 4 ++-- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index 61a1f62..415bee4 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -399,6 +399,11 @@ def version(self) -> str: def etc_path(self) -> Path: """Get the path to the Slurm configuration directory.""" + @property + @abstractmethod + def var_lib_path(self) -> Path: + """Get the path to the Slurm variable state data directory.""" + @abstractmethod def service_manager_for(self, type: _ServiceType) -> _ServiceManager: """Return the `ServiceManager` for the specified `ServiceType`.""" @@ -432,6 +437,11 @@ def etc_path(self) -> Path: """Get the path to the Slurm configuration directory.""" return Path("/var/snap/slurm/common/etc/slurm") + @property + def var_lib_path(self) -> Path: + """Get the path to the Slurm variable state data directory.""" + return Path("/var/snap/slurm/common/var/lib/slurm") + def service_manager_for(self, type: _ServiceType) -> _ServiceManager: """Return the `ServiceManager` for the specified `ServiceType`.""" return _SnapServiceManager(type) @@ -586,6 +596,11 @@ def etc_path(self) -> Path: """Get the path to the Slurm configuration directory.""" return Path("/etc/slurm") + @property + def var_lib_path(self) -> Path: + """Get the path to the Slurm variable state data directory.""" + return Path("/var/lib/slurm") + def service_manager_for(self, type: _ServiceType) -> _ServiceManager: """Return the `ServiceManager` for the specified `ServiceType`.""" return _SystemctlServiceManager(type) @@ -604,20 +619,16 @@ class _JWTKeyManager: pass the `snap` bool as an argument to `__init__` """ - def __init__(self, snap: bool = False): - self._keyfile_path = ( - Path("/var/snap/slurm/common/var/lib/slurm/slurm.state/jwt_hs256.key") - if snap - else Path("/var/lib/slurm/slurm.state/jwt_hs256.key") - ) + def __init__(self, keyfile: Path) -> None: + self._keyfile = keyfile def get(self) -> str: """Get the current jwt key.""" - return self._keyfile_path.read_text() + return self._keyfile.read_text() def set(self, key: str) -> None: """Set a new jwt key.""" - self._keyfile_path.write_text(key) + self._keyfile.write_text(key) def generate(self) -> None: """Generate a new, cryptographically secure jwt key.""" @@ -680,7 +691,7 @@ def __init__(self, service: _ServiceType, snap: bool = False) -> None: self._ops_manager = _SnapManager() if snap else _AptManager(service) self.service = self._ops_manager.service_manager_for(service) self.munge = _MungeManager(self._ops_manager) - self.jwt = _JWTKeyManager(snap) + self.jwt = _JWTKeyManager(self._ops_manager.var_lib_path / "slurm.data/jwt_hs256.key") self.exporter = _PrometheusExporterManager(self._ops_manager) self.install = self._ops_manager.install self.version = self._ops_manager.version diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index c1a39f4..fdee8fd 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -162,10 +162,10 @@ def setUp(self): self.setUpPyfakefs() self.fs.create_file("/var/snap/slurm/common/.env") self.fs.create_file("/var/snap/slurm/common/var/lib/slurm/slurm.state/jwt_hs256.key") - self.manager.jwt._keyfile_path = Path( + self.manager.jwt._keyfile = Path( "/var/snap/slurm/common/var/lib/slurm/slurm.state/jwt_hs256.key" ) - self.manager.jwt._keyfile_path.write_text(JWT_KEY) + self.manager.jwt._keyfile.write_text(JWT_KEY) def test_config_name(self, *_) -> None: """Test that the config name is correctly set.""" From 438276cc282a491cfaa438fac59910bf9febaba0 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Tue, 1 Oct 2024 16:00:59 -0400 Subject: [PATCH 5/6] docs(slurm_ops): update todo comments to link to github issues Follows style recommended in the Google Python styleguide. Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index 415bee4..e08ef47 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -418,9 +418,13 @@ class _SnapManager(_OpsManager): def install(self) -> None: """Install Slurm using the `slurm` snap.""" - # FIXME: Pin slurm to the stable channel + # TODO: https://github.com/charmed-hpc/hpc-libs/issues/35 - + # Pin Slurm snap to stable channel. _snap("install", "slurm", "--channel", "latest/candidate", "--classic") - # FIXME: Request automatic alias for `mungectl` so that we don't need to do this manually + # TODO: https://github.com/charmed-hpc/slurm-snap/issues/49 - + # Request automatic alias for the Slurm snap so we don't need to do it here. + # We will possibly need to account for a third-party Slurm snap installation + # where aliasing is not automatically performed. _snap("alias", "slurm.mungectl", "mungectl") def version(self) -> str: @@ -610,14 +614,12 @@ def _env_manager_for(self, type: _ServiceType) -> _EnvManager: return _EnvManager(file=self._env_file, prefix=type.value) +# TODO: https://github.com/charmed-hpc/hpc-libs/issues/36 - +# Use `jwtctl` to provide backend for generating, setting, and getting +# jwt signing key used by `slurmctld` and `slurmdbd`. This way we also +# won't need to pass the keyfile path to the `__init__` constructor. class _JWTKeyManager: - """Control the JWT key used by Slurm. - - Todo: - Use `jwtctl` to provide backend for generating, setting, and getting - JWT key used by `slurmctld` and `slurmrestd`. This way we won't need to - pass the `snap` bool as an argument to `__init__` - """ + """Control the jwt signing key used by Slurm.""" def __init__(self, keyfile: Path) -> None: self._keyfile = keyfile From e4c7dba0a64a5bc0c60b255541662d9da7fb70f0 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Tue, 1 Oct 2024 17:30:51 -0400 Subject: [PATCH 6/6] refactor(slurm_ops): pass _OpsManager rather than Path object to constructor Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index e08ef47..d52cbc1 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -621,8 +621,8 @@ def _env_manager_for(self, type: _ServiceType) -> _EnvManager: class _JWTKeyManager: """Control the jwt signing key used by Slurm.""" - def __init__(self, keyfile: Path) -> None: - self._keyfile = keyfile + def __init__(self, ops_manager: _OpsManager) -> None: + self._keyfile = ops_manager.var_lib_path / "slurm.state/jwt_hs256.key" def get(self) -> str: """Get the current jwt key.""" @@ -693,7 +693,7 @@ def __init__(self, service: _ServiceType, snap: bool = False) -> None: self._ops_manager = _SnapManager() if snap else _AptManager(service) self.service = self._ops_manager.service_manager_for(service) self.munge = _MungeManager(self._ops_manager) - self.jwt = _JWTKeyManager(self._ops_manager.var_lib_path / "slurm.data/jwt_hs256.key") + self.jwt = _JWTKeyManager(self._ops_manager) self.exporter = _PrometheusExporterManager(self._ops_manager) self.install = self._ops_manager.install self.version = self._ops_manager.version