Skip to content

Commit

Permalink
fix(jobbergate-agent-snap): Fixed problem config hook
Browse files Browse the repository at this point in the history
When the `snap set` command is used in quick succession (such as when
invoked through the `cloud-init.yaml` of the democluster, the command
can fail. This is happening because the snap attempts to restart rapidly
when it fails to start. The start will fail because the jobbergate-agent
is missing required config.

When the configure hook directs the snap to restart, it may happen while
the snap is in a restart cycle. Snap will detect that the restart
request happened too soon after a failed restart attempt and the command
will fail.

Setting the snap as disabled while the configure hook is running seems
to fix this issue.
  • Loading branch information
dusktreader committed Jan 31, 2025
1 parent 0948b69 commit d947f7c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 17 deletions.
41 changes: 24 additions & 17 deletions jobbergate-agent-snap/hooks/bin/configure
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import subprocess
import sys
from contextlib import contextmanager
from pathlib import Path
from typing import Union

Expand All @@ -28,25 +29,30 @@ AGENT_VARIABLES_MAP: dict[str, Union[str, int]] = {
}


@contextmanager
def handle_error(message: str):
"""Handle any errors encountered in this context manager."""
try:
yield
except Exception as exc:
sys.exit(f"Failed to {message} (from configure hook) -- {exc}")


def run_bash(bash_string: str) -> str:
"""Run bash command and return output as string."""
return subprocess.check_output(bash_string.split()).decode().rstrip()


def daemon_starter():
"""Start the daemon."""
try:
run_bash(f"snapctl start {SNAP_INSTANCE_NAME}.daemon")
except Exception:
sys.exit(1)
with handle_error(f"start {SNAP_INSTANCE_NAME}.daemon"):
run_bash(f"snapctl start --enable {SNAP_INSTANCE_NAME}.daemon")


def daemon_stopper():
"""Stop the daemon."""
try:
run_bash(f"snapctl stop {SNAP_INSTANCE_NAME}.daemon")
except Exception:
sys.exit(1)
with handle_error(f"stop {SNAP_INSTANCE_NAME}.daemon"):
run_bash(f"snapctl stop --disable {SNAP_INSTANCE_NAME}.daemon")


def snapctl_get(snap_config_value: str) -> Union[str, None]:
Expand All @@ -65,15 +71,16 @@ def snapctl_get(snap_config_value: str) -> Union[str, None]:

def configure_dotenv_files():
"""Configure the .env files based on the snap mode."""
env_file_content = ""
for env_var, env_value in AGENT_VARIABLES_MAP.items():
snapctl_value = snapctl_get(env_var.lower().replace("_", "-"))
if snapctl_value is not None:
env_value = snapctl_value
elif bool(env_value) is False:
continue
env_file_content += f"{DOTENV_PREFIX}{env_var}={env_value}\n"
DOTENV_FILE_LOCATION.write_text(env_file_content)
with handle_error(f"configure .env for {SNAP_INSTANCE_NAME}.daemon"):
env_file_content = ""
for env_var, env_value in AGENT_VARIABLES_MAP.items():
snapctl_value = snapctl_get(env_var.lower().replace("_", "-"))
if snapctl_value is not None:
env_value = snapctl_value
elif bool(env_value) is False:
continue
env_file_content += f"{DOTENV_PREFIX}{env_var}={env_value}\n"
DOTENV_FILE_LOCATION.write_text(env_file_content)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions jobbergate-agent-snap/snap/snapcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ parts:
- jobbergate-agent
build-packages:
- python3
- python3-distutils
- libapt-pkg-dev
- gcc
- g++
Expand Down

0 comments on commit d947f7c

Please sign in to comment.