Skip to content

Commit

Permalink
Merge pull request #524 from omnivector-solutions/revise-composed-set…
Browse files Browse the repository at this point in the history
…up-for-jobbergate-agent

Revised setup for jobbergate-agent in jobbergate-composed
  • Loading branch information
dusktreader authored Apr 8, 2024
2 parents ca8550a + 073ded9 commit 9354395
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 48 deletions.
20 changes: 0 additions & 20 deletions jobbergate-agent/Dockerfile.dev

This file was deleted.

23 changes: 0 additions & 23 deletions jobbergate-agent/etc/entrypoint.sh

This file was deleted.

16 changes: 15 additions & 1 deletion jobbergate-composed/Dockerfile-slurm
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:jammy-20211122
FROM ubuntu:jammy-20211122 as slurm-base


# Install GOSU
Expand Down Expand Up @@ -55,3 +55,17 @@ RUN useradd -ms /bin/bash local-user
COPY etc/slurm-entrypoint.sh /usr/local/bin/slurm-entrypoint.sh

ENTRYPOINT ["/usr/local/bin/slurm-entrypoint.sh"]



FROM slurm-base as jobbergate-agent

RUN apt update && apt install -y curl libpq-dev gcc python3-dev python3-pip && \
ln -s /usr/bin/python3 /usr/bin/python

RUN curl -sSL https://install.python-poetry.org | \
POETRY_HOME=/opt/poetry POETRY_VERSION=1.5.1 python && \
ln -s /opt/poetry/bin/poetry /usr/local/bin/poetry && \
poetry config virtualenvs.create false

WORKDIR /app
38 changes: 34 additions & 4 deletions jobbergate-composed/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -225,15 +225,38 @@ services:

slurmctld:
build:
context: ../jobbergate-agent/
dockerfile: Dockerfile.dev
context: .
dockerfile: Dockerfile-slurm
target: slurm-base
args:
- JWT_SECRET=${JWT_SECRET:-supersecret}
image: slurm-docker-cluster
networks:
- jobbergate-net
command: ["slurmctld"]
container_name: slurmctld
hostname: slurmctld
volumes:
- etc_munge:/etc/munge
- var_log_slurm:/var/log/slurm
- ./slurm-fake-nfs:/nfs
- ./slurm-work-dir:/slurm-work-dir
expose:
- "6817"

jobbergate-agent:
privileged: true
build:
context: .
dockerfile: Dockerfile-slurm
target: jobbergate-agent
args:
- JWT_SECRET=${JWT_SECRET:-supersecret}
networks:
- jobbergate-net
command: ["jobbergate-agent"]
container_name: jobbergate-agent
hostname: jobbergate-agent
environment:
- JOBBERGATE_AGENT_X_SLURM_USER_NAME=local-user
- JOBBERGATE_AGENT_DEFAULT_SLURM_WORK_DIR=/slurm-work-dir
Expand All @@ -252,20 +275,25 @@ services:
- ./slurm-fake-nfs:/nfs
- ./slurm-work-dir:/slurm-work-dir
- ../jobbergate-agent/jobbergate_agent/:/app/jobbergate_agent
- ../jobbergate-agent/pyproject.toml:/app/pyproject.toml
- ../jobbergate-agent/poetry.lock:/app/poetry.lock
- ../jobbergate-agent/README.md:/app/README.md
- ../jobbergate-agent/LICENSE:/app/LICENSE
- ../jobbergate-core/:/jobbergate-core
- jobbergate-agent-cache:/cache/
expose:
- "6817"
- "6818"
depends_on:
jobbergate-api:
condition: service_healthy
slurmdbd:
slurmctld:
condition: service_started

slurmdbd:
build:
context: .
dockerfile: Dockerfile-slurm
target: slurm-base
image: slurm-docker-cluster
networks:
- jobbergate-net
Expand All @@ -287,6 +315,7 @@ services:
build:
context: .
dockerfile: Dockerfile-slurm
target: slurm-base
image: slurm-docker-cluster
networks:
- jobbergate-net
Expand All @@ -308,6 +337,7 @@ services:
build:
context: .
dockerfile: Dockerfile-slurm
target: slurm-base
image: slurm-docker-cluster
networks:
- jobbergate-net
Expand Down
20 changes: 20 additions & 0 deletions jobbergate-composed/etc/slurm-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,24 @@ then
exec /usr/sbin/slurmd -Dvvv
fi

if [ "$1" = "jobbergate-agent" ]
then
echo "---> Waiting for slurmctld to become active before starting jobbergate-agent..."

until 2>/dev/null >/dev/tcp/slurmctld/6817
do
echo "-- slurmctld is not available. Sleeping ..."
sleep 2
done
echo "-- slurmctld is now active ..."

echo "---> Starting the Slurm Node Daemon (slurmd) ..."
exec /usr/sbin/slurmd -Dvvv &

echo "---> Starting Jobbergate-agent ..."
cd /app
poetry install
poetry run jg-run
fi

exec "$@"
2 changes: 2 additions & 0 deletions jobbergate-composed/etc/slurm.conf
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ AccountingStorageHost=slurmdbd
#
# COMPUTE NODES
NodeName=c[1-2] RealMemory=1000 State=UNKNOWN CPUs=4
NodeName=jobbergate-agent RealMemory=1000 State=UNKNOWN CPUs=4
#
# PARTITIONS
PartitionName=compute Default=yes Nodes=c[1-2] Priority=50 Shared=NO MaxNodes=2 MaxTime=5-00:00:00 DefaultTime=5-00:00:00 State=UP
PartitionName=login Default=no Nodes=jobbergate-agent Priority=50 Shared=NO MaxNodes=1 MaxTime=5-00:00:00 DefaultTime=5-00:00:00 State=INACTIVE

0 comments on commit 9354395

Please sign in to comment.