Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup #389

Merged
merged 5 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -473,9 +473,9 @@ def __init__(self, initial_job_request: V0InitialJobRequest | V1InitialJobReques
self.specs_volume_mount_dir = self.temp_dir / "specs"
self.download_manager = DownloadManager()

self.job_container_name = f"{settings.EXECUTOR_TOKEN}-job"
self.nginx_container_name = f"{settings.EXECUTOR_TOKEN}-nginx"
self.job_network_name = f"{settings.EXECUTOR_TOKEN}-network"
self.job_container_name = f"ch-{settings.EXECUTOR_TOKEN}-job"
self.nginx_container_name = f"ch-{settings.EXECUTOR_TOKEN}-nginx"
self.job_network_name = f"ch-{settings.EXECUTOR_TOKEN}-network"
self.process: asyncio.subprocess.Process | None = None
self.cmd: list[str] = []

Expand All @@ -489,12 +489,26 @@ def __init__(self, initial_job_request: V0InitialJobRequest | V1InitialJobReques
save_public_key(self.initial_job_request.public_key, self.nginx_dir_path)
self.is_streaming_job = True

async def cleanup_potential_old_jobs(self):
await (
await asyncio.create_subprocess_shell(
"docker kill $(docker ps -q --filter 'name=ch-.*-job')"
)
).communicate()
await (
await asyncio.create_subprocess_shell(
"docker kill $(docker ps -q --filter 'name=ch-.*-nginx')"
)
).communicate()

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may as well clear the network the same way - docker network rm -f with docker network ls -q --filter name=...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah but in my tests the network wasn't causing an issue

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah but in my tests the network wasn't causing an issue

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah but in my tests the network wasn't causing an issue

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah but in my tests the network wasn't causing an issue

async def prepare(self):
self.volume_mount_dir.mkdir(exist_ok=True)
self.output_volume_mount_dir.mkdir(exist_ok=True)

logger.info("preparing in progress")

await self.cleanup_potential_old_jobs()

if self.initial_job_request.base_docker_image_name is not None:
logger.info("docker pull %s", self.initial_job_request.base_docker_image_name)
process = await asyncio.create_subprocess_exec(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import random
import string
import subprocess
import uuid
import zipfile
from functools import partial
Expand Down Expand Up @@ -70,6 +71,25 @@ def __init__(self, messages, *args, **kwargs):


def test_main_loop():
job_container_name = f"ch-{uuid.uuid4()}-job"
nginx_container_name = f"ch-{uuid.uuid4()}-nginx"
for container_name in [job_container_name, nginx_container_name]:
subprocess.check_output(
[
"docker",
"run",
"-d",
"--name",
container_name,
"busybox",
"sleep",
"1000",
]
)
for container_name in [job_container_name, nginx_container_name]:
output = subprocess.check_output(["docker", "ps", "--filter", f"name={container_name}"])
assert container_name.encode() in output

command = CommandTested(
iter(
[
Expand Down Expand Up @@ -117,6 +137,10 @@ def test_main_loop():
},
]

for container_name in [job_container_name, nginx_container_name]:
output = subprocess.check_output(["docker", "ps", "--filter", f"name={container_name}"])
assert container_name.encode() not in output


def test_main_loop_streaming_job():
_, public_key, _ = generate_certificate_at()
Expand Down
2 changes: 1 addition & 1 deletion executor/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def test(session):
"-x",
"-vv",
"-n",
"auto",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, so that tests dont kill each others' containers, makes sense

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They do get a bit slower but I guess it's not a huge deal - 38s ⏱️ +14s

"1",
"--junitxml",
"test-report.xml",
"compute_horde_executor",
Expand Down