diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..4ad255a --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,43 @@ +name: Release to Charmhub + +on: + workflow_dispatch: + inputs: + channel: + description: 'The channel where the charm is going to be released to' + required: true + default: 'stable' + type: choice + options: + - stable + - candidate + - beta + - edge + +jobs: + ci-tests: + uses: ./.github/workflows/test.yaml + + release-to-charmhub: + name: Release to CharmHub + needs: + - ci-tests + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install Charmcraft + run: | + sudo snap install charmcraft --channel 3.x/stable --classic + + - name: Build bundle + run: | + charmcraft pack + mv vantage-agent_*.charm vantage-agent.charm + + - name: Create bundle revision + run: | + charmcraft upload --name=vantage-agent --release=${{ github.event.inputs.channel }} vantage-agent.charm + env: + CHARMCRAFT_AUTH: ${{ secrets.CHARMCRAFT_AUTH }} \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..d620ab7 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,21 @@ +name: Test + +on: + pull_request: + workflow_call: + +jobs: + lint-charms: + name: "Lint the charms" + runs-on: "ubuntu-latest" + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + architecture: 'x64' + + - name: "Install tox, run 'make lint'" + run: | + pip install tox==4.18.0 + tox -e lint \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a09b89e --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*/.mypy_cache/ +build/ +*.charm +vantage-manager-agent.yaml +version +.venv +.tox +.ruff_cache \ No newline at end of file diff --git a/.jujuignore b/.jujuignore new file mode 100644 index 0000000..fa4f95f --- /dev/null +++ b/.jujuignore @@ -0,0 +1,7 @@ +/.pytest_cache +/build +/.tox +__pycache__ + +/vantage-agent +/.vscode \ No newline at end of file diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..871f80a --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12.3 diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 0000000..e1c9975 --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,12 @@ +========== +Change Log +========== + +This file keeps track of all notable changes to the Vantage Agents Charm. + +Unreleased +---------- + +1.0.0 - 2023-03-07 +------------------ +* Create the project diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5042441 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 OmniVector Corp + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..52155fc --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +.PHONY: help +help: + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' + +.PHONY: lint +lint: ## Run linter + tox -e lint + +version: ## Create/update version file + @git describe --tags --dirty --always > version + +.PHONY: clean +clean: ## Remove build dirs, temp files, and charms + rm -rf .venv/ + rm -rf build + rm -rf version + find . -name "*.charm" -delete + +.PHONY: charm +charm: version ## Pack the charm + @charmcraft pack + @mv vantage-agent_*.charm vantage-agent.charm \ No newline at end of file diff --git a/README.md b/README.md index dd4925a..09f5e71 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,68 @@ # charm-vantage-agent -Charm for the Vantage Agent + +# Usage + +Follow the steps below to get started. + +### Build the charm + +Running the following command will produce a .charm file, `vantage-agent.charm` + +```bash +charmcraft build +``` + +### Create the vantage-agent charm config + +`vantage-agent.yaml` + +```yaml +vantage-agent: + base-api-url: "" + oidc-domain: "" + oidc-client-id: "" + oidc-client-secret: "" +``` + +e.g. + +```yaml +vantage-agent: + base-api-url: "https://apis.vantagehpc.io/cluster" + oidc-domain: "auth.vantagehpc.io" + oidc-client-id: "ae4e7c40-7889-45ae-bd36-1ad2f25dc679" + oidc-client-secret: "LMmPxusATyKz_dp63hjeJO7cFUayiYvudGv4r3gUk_4" +``` + +### Deploy the charm + +Using the built charm and the defined config, run the command to deploy the charm. + +```bash +juju deploy ./vantage-agent.charm \ + --config ./vantage-agent.yaml \ + --series jammy +``` + +### Charm Actions + +The vantage-agent charm exposes additional functionality to facilitate vantage-agent +package upgrades. + +To upgrade the vantage-agent to a new version or release: + +```bash +juju run-action vantage-agent/leader upgrade version="7.7.7" +``` + +This will result in the vantage-agent package upgrade to 7.7.7. + +Manually triggers the cleaning of vantage-agent's cache dir: + +```bash +juju run-action vantage-agent/leader clear-cache-dir +``` + +#### License + +* MIT (see `LICENSE` file in this directory for full preamble) \ No newline at end of file diff --git a/actions.yaml b/actions.yaml new file mode 100644 index 0000000..b557c26 --- /dev/null +++ b/actions.yaml @@ -0,0 +1,9 @@ +upgrade-vtg: + description: > + Upgrade vantage agent. + params: + version: + type: string + description: Version of vantage agent to upgrade to. + required: + - version diff --git a/charmcraft.yaml b/charmcraft.yaml new file mode 100644 index 0000000..4b5aa05 --- /dev/null +++ b/charmcraft.yaml @@ -0,0 +1,15 @@ +type: charm +bases: +- build-on: + - name: ubuntu + channel: "22.04" + run-on: + - name: ubuntu + channel: "20.04" + architectures: [amd64] + - name: ubuntu + channel: "22.04" + architectures: [amd64] +parts: + charm: + charm-python-packages: [setuptools] \ No newline at end of file diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..1e33dac --- /dev/null +++ b/config.yaml @@ -0,0 +1,49 @@ +options: + # Resource URL + base-api-url: + type: string + default: "" + description: | + The Vantage API URL + + # Slurm related setings + scontrol-path: + type: string + default: "/usr/bin/scontrol" + description: | + Absolute path to the scontrol command + + # Auth related settings + oidc-domain: + type: string + default: "" + description: | + Domain for the OIDC provider API where auth tokens will be fetched + oidc-client-id: + type: string + default: "" + description: | + The ID for the OIDC provider app client to which tokens will be issued + oidc-client-secret: + type: string + default: "" + description: | + The secret key for the OIDC provider app client to which tokens will be issued + + task-jobs-interval-seconds: + type: int + default: 30 + description: | + The interval in seconds at which the agent will run internal jobs + task-self-update-interval-seconds: + type: int + default: 30 + description: | + The interval in seconds at which the agent will check for version updates + + # Other settings + cache-dir: + type: string + default: "/var/cache/vantage-agent" + description: | + Absolute path to the cache directory diff --git a/dispatch b/dispatch new file mode 100755 index 0000000..1749a71 --- /dev/null +++ b/dispatch @@ -0,0 +1,45 @@ +#!/bin/bash +# This hook installs the dependencies needed to run the charm, +# creates the dispatch executable, regenerates the symlinks for start and +# upgrade-charm, and kicks off the operator framework. + +set -e + +# Source the os-release information into the env +. /etc/os-release + +PYTHON_BIN=/opt/python/python3.12/bin/python3.12 + +if ! [[ -f '.installed' ]] +then + if [[ ! -e $PYTHON_BIN ]] + then + if [[ $ID == 'rocky' ]] + then + # Install dependencies to build custom python + yum -y install epel-release + yum -y install wget gcc make tar bzip2-devel zlib-devel xz-devel openssl-devel libffi-devel sqlite-devel ncurses-devel xz-devel gdbm tk-devel readline-devel sqlite-devel libnsl2-devel + + # Install yaml deps + yum -y install libyaml-devel + fi + + if [[ $ID == 'ubuntu' ]] + then + # Install dependencies to build custom python + apt install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev + fi + + export PYTHON_VERSION=3.12.1 + wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz -P /tmp + tar xvf /tmp/Python-${PYTHON_VERSION}.tar.xz -C /tmp + cd /tmp/Python-${PYTHON_VERSION} + ./configure --prefix=/opt/python/python3.12 --enable-optimizations + make -C /tmp/Python-${PYTHON_VERSION} -j $(nproc) altinstall + cd $OLDPWD + rm -rf /tmp/Python* + fi + touch .installed +fi + +JUJU_DISPATCH_PATH="${JUJU_DISPATCH_PATH:-$0}" PYTHONPATH=lib:venv $PYTHON_BIN ./src/charm.py \ No newline at end of file diff --git a/metadata.yaml b/metadata.yaml new file mode 100644 index 0000000..0353438 --- /dev/null +++ b/metadata.yaml @@ -0,0 +1,28 @@ +name: vantage-agent +display-name: Vantage Agent +summary: Vantage Agent package lifecycle encapsulated in a charm +maintainers: +- Omnivector Corp +description: | + This charm provides the Vantage Agent component of vantage. + It is a wrapper around the vantage-agent package, which is the core agent component of Vantage alongside the Jobbergate Agent. +tags: +- vantage +- hpc + +subordinate: true + +series: +- centos7 +- centos8 +- focal +- jammy + +requires: + juju-info: + interface: juju-info + scope: container + prolog-epilog: + interface: prolog-epilog + fluentbit: + interface: fluentbit diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..071b614 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +ops==1.3.0 \ No newline at end of file diff --git a/src/charm.py b/src/charm.py new file mode 100644 index 0000000..bcb5c7f --- /dev/null +++ b/src/charm.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +"""VantageAgentCharm.""" + +import logging +from pathlib import Path + +from ops.charm import CharmBase +from ops.framework import StoredState +from ops.main import main +from ops.model import ActiveStatus, BlockedStatus, WaitingStatus + +from vantage_agents_ops import VantageAgentOps + + +logger = logging.getLogger() + +# Create a sentinel value. For more information about sentinel values and their +# purpose, see the PEP (with some excellent examples and rationale) here: +# https://peps.python.org/pep-0661/ +unset = object() + + +class VantageAgentCharm(CharmBase): + """Facilitate vantage-agent lifecycle.""" + + stored = StoredState() + + def __init__(self, *args): + """Initialize and observe.""" + super().__init__(*args) + + self.stored.set_default(installed=False) + self.stored.set_default(config_available=False) + self.stored.set_default(user_created=False) + + self.vantage_agent_ops = VantageAgentOps(self) + + event_handler_bindings = { + self.on.install: self._on_install, + self.on.upgrade_charm: self._on_upgrade, + self.on.start: self._on_start, + self.on.config_changed: self._on_config_changed, + self.on.remove: self._on_remove, + self.on.upgrade_vtg_action: self._on_upgrade_action, + } + for event, handler in event_handler_bindings.items(): + self.framework.observe(event, handler) + + def _on_install(self, event): + """Install vantage-agent.""" + self.unit.set_workload_version(Path("version").read_text().strip()) + + try: + self.vantage_agent_ops.install() + self.stored.installed = True + except Exception as e: + logger.error(f"## Error installing agent: {e}") + self.stored.installed = False + self.unit.status = BlockedStatus("Error installing vantage-agent") + event.defer() + return + # Log and set status + logger.debug("vantage-agent installed") + self.unit.status = WaitingStatus("vantage-agent installed") + + def _on_upgrade(self, event): + """Perform upgrade operations.""" + self.unit.set_workload_version(Path("version").read_text().strip()) + + def _on_start(self, event): + """ + Start vantage-agent. + + Check that we have the needed configuration values and whether the + vantage agent user is created in the slurmctld node, if so + start the vantage-agent otherwise defer the event. + """ + if not self.stored.config_available: + event.defer() + return + + logger.info("## Starting Vantage agent") + self.vantage_agent_ops.start_agent() + self.unit.status = ActiveStatus("vantage agent started") + + def _on_config_changed(self, event): + """ + Handle changes to the charm config. + + If all the needed settings are available in the charm config, create the + environment settings for the charmed app. Also, store the config values in the + stored state for the charm. + + Note the use of the sentinel ``unset`` value here. This allows us to + distinguish between *unset* values and values that were *explicitly* set to + falsey or null values. For more information about sentinel values, see + `PEP-661 _`. + """ + + settings_to_map = { + "base-api-url": True, + "scontrol-path": False, + "oidc-domain": True, + "oidc-client-id": True, + "oidc-client-secret": True, + "task-jobs-interval-seconds": False, + "task-self-update-interval-seconds": False, + "cache-dir": False, + } + + env_context = dict() + + for setting, is_required in settings_to_map.items(): + value = self.model.config.get(setting, unset) + + if value is unset and not is_required: + # Not set, not required, just continue + continue + elif value is unset and is_required: + # Is unset but required, defer + event.defer() + return + + env_context[setting] = value + + self.vantage_agent_ops.configure_env_defaults(env_context) + + mapped_key = setting.replace("-", "_") + store_value = getattr(self.stored, mapped_key, unset) + if store_value != value: + setattr(self.stored, mapped_key, value) + + self.stored.config_available = True + + logger.info("## Restarting the agent") + self.vantage_agent_ops.restart_agent() + self.unit.status = ActiveStatus("Vantage agent restarted") + + def _on_remove(self, event): + """Remove directories and files created by vantage-agent charm.""" + self.vantage_agent_ops.remove() + + def _on_upgrade_action(self, event): + version = event.params["version"] + try: + self.vantage_agent_ops.upgrade(version) + event.set_results({"upgrade": "success"}) + self.unit.status = ActiveStatus(f"Updated to version {version}") + self.vantage_agent_ops.restart_agent() + except Exception: + self.unit.status = BlockedStatus(f"Error updating to version {version}") + event.fail() + + def _on_clear_cache_dir_action(self, event): + try: + result = self.vantage_agent_ops.clear_cache_dir() + event.set_results({"cache-clear": "success"}) + self.unit.status = ActiveStatus(result) + self.vantage_agent_ops.restart_agent() + except Exception: + self.unit.status = BlockedStatus("Error clearing cache") + event.fail() + + +if __name__ == "__main__": + main(VantageAgentCharm) diff --git a/src/templates/vantage-agent.service b/src/templates/vantage-agent.service new file mode 100644 index 0000000..28824f2 --- /dev/null +++ b/src/templates/vantage-agent.service @@ -0,0 +1,14 @@ +[Unit] +Description=vantage-agent +After=network.target + +[Service] +Type=simple +User=root +Group=root +WorkingDirectory=/srv/vantage-agent-venv +ExecStart=/srv/vantage-agent-venv/bin/vtg-run + +[Install] +Alias=vantage-agent.service +WantedBy=multi-user.target \ No newline at end of file diff --git a/src/vantage_agents_ops.py b/src/vantage_agents_ops.py new file mode 100644 index 0000000..dd3e2cc --- /dev/null +++ b/src/vantage_agents_ops.py @@ -0,0 +1,208 @@ +""" +VantageAgentOps. +""" + +import logging +import subprocess +from pathlib import Path +from shutil import copy2, rmtree +from typing import Any, Dict, Optional + +logger = logging.getLogger() + + +class VantageAgentOps: + """Track and perform vantage-agent ops.""" + + _PACKAGE_NAME = "vantage-agent" + _SYSTEMD_SERVICE_NAME = "vantage-agent" + _SYSTEMD_BASE_PATH = Path("/usr/lib/systemd/system") + _SYSTEMD_SERVICE_ALIAS = f"{_PACKAGE_NAME}.service" + _SYSTEMD_SERVICE_FILE = _SYSTEMD_BASE_PATH / _SYSTEMD_SERVICE_ALIAS + _VENV_DIR = Path("/srv/vantage-agent-venv") + _ENV_DEFAULTS = _VENV_DIR / ".env" + _PIP_CMD = _VENV_DIR.joinpath("bin", "pip").as_posix() + _PYTHON_CMD = Path("/opt/python/python3.12/bin/python3.12") + _CACHE_DIR = Path("/var/cache/vantage-agent") + + def __init__(self, charm): + """Initialize vantage-agent-ops.""" + self._charm = charm + + def install(self): + """Install vantage-agent and setup ops.""" + # Create the virtualenv and ensure pip is up to date. + self._create_venv_and_ensure_latest_pip() + # Install additional dependencies. + self._install_extra_deps() + # Install vantage-agent + self._install_vantage_agent() + # Clear cached data + self.clear_cache_dir() + # Provision the vantage-agent systemd service. + self._setup_systemd() + + def upgrade(self, version: str): + """Upgrade the vantage-agent python package.""" + # Clear cached data + self.clear_cache_dir() + self._upgrade_vantage_agent(version) + + def get_version_info(self): + """Show version and info about vantage-agent.""" + cmd = [self._PIP_CMD, "show", self._PACKAGE_NAME] + + out = subprocess.check_output(cmd, env={}).decode().strip() + + return out + + def configure_env_defaults( + self, config_context: Dict[str, Any], header: Optional[str] = None + ): + """ + Map charm configs found in the config_context to app settings. + + Map the settings found in the charm's config.yaml to the expected + settings for the application (including the prefix). Write all settings to the + configured dot-env file. If the file exists, it should be replaced. + """ + prefix = "VANTAGE_AGENT_" + with open(self._ENV_DEFAULTS, "w") as env_file: + if header: + print(header, file=env_file) + for key, value in config_context.items(): + mapped_key = key.replace("-", "_").upper() + print(f"{prefix}{mapped_key}={value}", file=env_file) + + print(f"{prefix}CACHE_DIR={self._CACHE_DIR}", file=env_file) + + # Clear cached data + self.clear_cache_dir() + + def systemctl(self, operation: str): + """ + Run systemctl operation for the service. + """ + cmd = [ + "systemctl", + operation, + self._SYSTEMD_SERVICE_NAME, + ] + try: + subprocess.call(cmd) + except subprocess.CalledProcessError as e: + logger.error(f"Error running {' '.join(cmd)} - {e}") + + def remove(self): + """ + Remove the things we have created. + """ + # Stop and disable the systemd service. + self.systemctl("stop") + self.systemctl("disable") + # Remove files and dirs created by this charm. + if self._SYSTEMD_SERVICE_FILE.exists(): + self._SYSTEMD_SERVICE_FILE.unlink() + subprocess.call(["systemctl", "daemon-reload"]) + rmtree(self._VENV_DIR.as_posix()) + + def _create_venv_and_ensure_latest_pip(self): + """Create the virtualenv and upgrade pip.""" + + # Create the virtualenv + create_venv_cmd = [ + self._PYTHON_CMD, + "-m", + "venv", + self._VENV_DIR.as_posix(), + ] + logger.debug(f"## Creating virtualenv: {create_venv_cmd}") + subprocess.call(create_venv_cmd, env=dict()) + logger.debug("## vantage-agent virtualenv created") + + # Ensure we have the latest pip + upgrade_pip_cmd = [ + self._PIP_CMD, + "install", + "--upgrade", + "pip", + ] + logger.debug(f"## Updating pip: {upgrade_pip_cmd}") + subprocess.call(upgrade_pip_cmd, env=dict()) + logger.debug("## Pip upgraded") + + def _setup_systemd(self): + """Provision the vantage-agent systemd service.""" + copy2( + "./src/templates/vantage-agent.service", + self._SYSTEMD_SERVICE_FILE.as_posix(), + ) + + subprocess.call(["systemctl", "daemon-reload"]) + subprocess.call(["systemctl", "enable", "--now", self._SYSTEMD_SERVICE_ALIAS]) + + def _install_extra_deps(self): + """Install additional dependencies.""" + # Install uvicorn and pyyaml + cmd = [self._PIP_CMD, "install", "pyyaml"] + logger.debug(f"## Installing extra dependencies: {cmd}") + try: + subprocess.call(cmd, env=dict()) + except subprocess.CalledProcessError as e: + logger.error(f"Error running {' '.join(cmd)} - {e}") + raise e + + def _install_vantage_agent(self): + """Install the vantage-agent package.""" + cmd = [ + self._PIP_CMD, + "install", + "-U", + self._PACKAGE_NAME, + ] + logger.debug(f"## Installing the agent: {cmd}") + try: + subprocess.call(cmd, env=dict()) + except subprocess.CalledProcessError as e: + logger.error(f"Error running {' '.join(cmd)} - {e}") + raise e + + def _upgrade_vantage_agent(self, version: str): + """Upgrade the vantage-agent python package.""" + cmd = [ + self._PIP_CMD, + "install", + "-U", + f"{self._PACKAGE_NAME}=={version}", + ] + + try: + subprocess.call(cmd, env=dict()) + except subprocess.CalledProcessError as e: + logger.error(f"Error running {' '.join(cmd)} - {e}") + raise e + + def clear_cache_dir(self) -> str: + """Clear the cache dir. vantage-agent will recreate it on the next run.""" + + if self._CACHE_DIR.exists(): + logger.debug(f"Clearing cache dir {self._CACHE_DIR.as_posix()}") + rmtree(self._CACHE_DIR, ignore_errors=True) + return "Cache cleared" + else: + logger.debug( + f"Cache dir {self._CACHE_DIR.as_posix()} doesn't exist. Skipping." + ) + return "Cache dir doesn't exist. Skipping." + + def start_agent(self): + """Starts the vantage-agent""" + self.systemctl("start") + + def stop_agent(self): + """Stops the vantage-agent""" + self.systemctl("stop") + + def restart_agent(self): + """Restart the vantage-agent""" + self.systemctl("restart") diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..1c90422 --- /dev/null +++ b/tox.ini @@ -0,0 +1,28 @@ +[tox] +skipsdist = True +envlist = format, lint + + +[testenv] +basepython = python3 + + +[testenv:lint] +commands = ruff check {posargs} src/ +deps = + ruff + +[testenv:format] +commands = ruff format {posargs} src/ +deps = + ruff + + +[ruff] +line-length = 110 +extend-exclude = ["__pycache__", "*.egg_info", "__init__.py"] + +[ruff.lint] +select = ["E", "W", "F", "C", "N", "D", "I001", "I"] +ignore = ["D213", "D211", "D203", "C408"] +fixable = ["ALL"]