From 010c26315c543db637a1276f64adb715a5195d1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Nowacki?= Date: Tue, 28 Jan 2025 00:38:33 +0100 Subject: [PATCH 1/5] have executors cleanup past lingering containers --- .../executor/management/commands/run_executor.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py b/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py index ee8270536..b71160076 100644 --- a/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py +++ b/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py @@ -473,9 +473,9 @@ def __init__(self, initial_job_request: V0InitialJobRequest | V1InitialJobReques self.specs_volume_mount_dir = self.temp_dir / "specs" self.download_manager = DownloadManager() - self.job_container_name = f"{settings.EXECUTOR_TOKEN}-job" - self.nginx_container_name = f"{settings.EXECUTOR_TOKEN}-nginx" - self.job_network_name = f"{settings.EXECUTOR_TOKEN}-network" + self.job_container_name = f"ch-{settings.EXECUTOR_TOKEN}-job" + self.nginx_container_name = f"ch-{settings.EXECUTOR_TOKEN}-nginx" + self.job_network_name = f"ch-{settings.EXECUTOR_TOKEN}-network" self.process: asyncio.subprocess.Process | None = None self.cmd: list[str] = [] @@ -489,12 +489,20 @@ def __init__(self, initial_job_request: V0InitialJobRequest | V1InitialJobReques save_public_key(self.initial_job_request.public_key, self.nginx_dir_path) self.is_streaming_job = True + async def cleanup_potential_old_jobs(self): + await (await asyncio.create_subprocess_shell( + "docker kill $(docker ps -q --filter 'name=ch-.*-job')")).communicate() + await (await asyncio.create_subprocess_shell( + "docker kill $(docker ps -q --filter 'name=ch-.*-nginx')")).communicate() + async def prepare(self): self.volume_mount_dir.mkdir(exist_ok=True) self.output_volume_mount_dir.mkdir(exist_ok=True) logger.info("preparing in progress") + await self.cleanup_potential_old_jobs() + if self.initial_job_request.base_docker_image_name is not None: logger.info("docker pull %s", self.initial_job_request.base_docker_image_name) process = await asyncio.create_subprocess_exec( From 2781fba4cb4879dde968110981dcff87416e1df6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Nowacki?= Date: Thu, 30 Jan 2025 13:14:10 +0100 Subject: [PATCH 2/5] executor cleanup test --- .../tests/integration/test_main_loop.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py b/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py index 60cdb4cf0..341ef4951 100644 --- a/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py +++ b/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py @@ -4,6 +4,7 @@ import logging import random import string +import subprocess import uuid import zipfile from functools import partial @@ -70,6 +71,23 @@ def __init__(self, messages, *args, **kwargs): def test_main_loop(): + job_container_name = f'ch-{uuid.uuid4()}-job' + nginx_container_name = f'ch-{uuid.uuid4()}-nginx' + for container_name in [job_container_name, nginx_container_name]: + subprocess.check_output([ + 'docker', + 'run', + '-d', + '--name', + container_name, + 'busybox', + 'sleep', + '1000', + ]) + for container_name in [job_container_name, nginx_container_name]: + output = subprocess.check_output(['docker', 'ps', '--filter', f'name={container_name}']) + assert container_name.encode() in output + command = CommandTested( iter( [ @@ -117,6 +135,10 @@ def test_main_loop(): }, ] + for container_name in [job_container_name, nginx_container_name]: + output = subprocess.check_output(['docker', 'ps', '--filter', f'name={container_name}']) + assert container_name.encode() not in output + def test_main_loop_streaming_job(): _, public_key, _ = generate_certificate_at() From 35a67832fb08a39da8137fc48452f0175ceb7c20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Nowacki?= Date: Thu, 30 Jan 2025 14:12:02 +0100 Subject: [PATCH 3/5] lint fix and change executor test parallel procceses to 1 --- .../tests/integration/test_main_loop.py | 20 +++++++++---------- executor/noxfile.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py b/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py index 341ef4951..97de16fce 100644 --- a/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py +++ b/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py @@ -71,18 +71,18 @@ def __init__(self, messages, *args, **kwargs): def test_main_loop(): - job_container_name = f'ch-{uuid.uuid4()}-job' - nginx_container_name = f'ch-{uuid.uuid4()}-nginx' + job_container_name = f"ch-{uuid.uuid4()}-job" + nginx_container_name = f"ch-{uuid.uuid4()}-nginx" for container_name in [job_container_name, nginx_container_name]: subprocess.check_output([ - 'docker', - 'run', - '-d', - '--name', + "docker", + "run", + "-d", + "--name", container_name, - 'busybox', - 'sleep', - '1000', + "busybox", + "sleep", + "1000", ]) for container_name in [job_container_name, nginx_container_name]: output = subprocess.check_output(['docker', 'ps', '--filter', f'name={container_name}']) @@ -136,7 +136,7 @@ def test_main_loop(): ] for container_name in [job_container_name, nginx_container_name]: - output = subprocess.check_output(['docker', 'ps', '--filter', f'name={container_name}']) + output = subprocess.check_output(["docker", "ps", "--filter", f"name={container_name}"]) assert container_name.encode() not in output diff --git a/executor/noxfile.py b/executor/noxfile.py index f9d6ce75f..1482e5d0c 100644 --- a/executor/noxfile.py +++ b/executor/noxfile.py @@ -159,7 +159,7 @@ def test(session): "-x", "-vv", "-n", - "auto", + "1", "--junitxml", "test-report.xml", "compute_horde_executor", From c48c784be475511cd52f3afd9a5815f7c4953a08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Nowacki?= Date: Thu, 30 Jan 2025 14:15:30 +0100 Subject: [PATCH 4/5] even more linting fixes --- .../executor/tests/integration/test_main_loop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py b/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py index 97de16fce..64ee28c7e 100644 --- a/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py +++ b/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py @@ -85,7 +85,7 @@ def test_main_loop(): "1000", ]) for container_name in [job_container_name, nginx_container_name]: - output = subprocess.check_output(['docker', 'ps', '--filter', f'name={container_name}']) + output = subprocess.check_output(["docker", "ps", "--filter", f"name={container_name}"]) assert container_name.encode() in output command = CommandTested( From 19dcf83e8cc44199b36f377d1836e47fd2028b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Nowacki?= Date: Thu, 30 Jan 2025 14:41:24 +0100 Subject: [PATCH 5/5] even more linting fixes --- .../management/commands/run_executor.py | 14 ++++++++---- .../tests/integration/test_main_loop.py | 22 ++++++++++--------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py b/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py index b71160076..bf00c3fb6 100644 --- a/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py +++ b/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py @@ -490,10 +490,16 @@ def __init__(self, initial_job_request: V0InitialJobRequest | V1InitialJobReques self.is_streaming_job = True async def cleanup_potential_old_jobs(self): - await (await asyncio.create_subprocess_shell( - "docker kill $(docker ps -q --filter 'name=ch-.*-job')")).communicate() - await (await asyncio.create_subprocess_shell( - "docker kill $(docker ps -q --filter 'name=ch-.*-nginx')")).communicate() + await ( + await asyncio.create_subprocess_shell( + "docker kill $(docker ps -q --filter 'name=ch-.*-job')" + ) + ).communicate() + await ( + await asyncio.create_subprocess_shell( + "docker kill $(docker ps -q --filter 'name=ch-.*-nginx')" + ) + ).communicate() async def prepare(self): self.volume_mount_dir.mkdir(exist_ok=True) diff --git a/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py b/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py index 64ee28c7e..0c47cdf8e 100644 --- a/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py +++ b/executor/app/src/compute_horde_executor/executor/tests/integration/test_main_loop.py @@ -74,16 +74,18 @@ def test_main_loop(): job_container_name = f"ch-{uuid.uuid4()}-job" nginx_container_name = f"ch-{uuid.uuid4()}-nginx" for container_name in [job_container_name, nginx_container_name]: - subprocess.check_output([ - "docker", - "run", - "-d", - "--name", - container_name, - "busybox", - "sleep", - "1000", - ]) + subprocess.check_output( + [ + "docker", + "run", + "-d", + "--name", + container_name, + "busybox", + "sleep", + "1000", + ] + ) for container_name in [job_container_name, nginx_container_name]: output = subprocess.check_output(["docker", "ps", "--filter", f"name={container_name}"]) assert container_name.encode() in output