Skip to content

Commit

Permalink
chore: fix docker build
Browse files Browse the repository at this point in the history
  • Loading branch information
ClemDoum committed Dec 4, 2024
1 parent d203179 commit d0fa699
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 46 deletions.
2 changes: 2 additions & 0 deletions .data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore
38 changes: 32 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,23 +1,47 @@
version: '3.7'

services:
dummy-worker:
ml-worker:
build:
context: .
target: worker
container_name: datashare-ml-worker
deploy:
mode: replicated
replicas: 1
depends_on:
datashare_web:
condition: service_started
environment:
DS_DOCKER_SPACY_LOG_LEVEL: DEBUG
DS_DOCKER_SPACY_MAX_PROCESSES: 1
DS_DOCKER_SPACY_MAX_LANGUAGES_IN_MEMORY: 1
DS_DOCKER_ML_LOG_LEVEL: DEBUG
DS_DOCKER_ML_MAX_PROCESSES: 1
DS_DOCKER_ML_MAX_LANGUAGES_IN_MEMORY: 1
DS_DOCKER_ES_ADDRESS: http://elasticsearch:9200
ICIJ_WORKER_TYPE: amqp
ICIJ_WORKER_RABBITMQ_HOST: rabbitmq
ICIJ_WORKER_RABBITMQ_PORT: 5672

# Adding rabbitmq to distribute Datashare tasks
rabbitmq:
image: rabbitmq:3.12.0-management
# Set a host name to prevent rabbitmq to create new conf each time https://stackoverflow.com/a/53772874
hostname: rmq
container_name: datashare-rabbitmq
healthcheck:
test: rabbitmq-diagnostics -q status
interval: 5s
timeout: 2s
retries: 10
start_period: 5s
ports:
- "5672:5672"
- "15672:15672"
volumes:
- type: volume
source: rabbitmq-data
target: /var/lib/rabbitmq


# Unless commented, all lines below are copied from https://icij.gitbook.io/datashare/server-mode/install-with-docker
datashare_web:
image: icij/datashare:19.0.0
Expand All @@ -28,11 +52,13 @@ services:
- DS_DOCKER_MOUNTED_DATA_DIR=${PWD}/data
volumes:
- type: bind
source: ${PWD}/Datashare
source: ${PWD}/.data
target: /home/datashare/Datashare
depends_on:
elasticsearch:
condition: service_healthy
rabbitmq:
condition: service_healthy
# We need to switch the batchQueueType to AMQP and taskRoutingStrategy to Group compared to the default config
command: >-
--mode SERVER
Expand Down Expand Up @@ -106,4 +132,4 @@ volumes:
elasticsearch-data:
postgresql-data:
redis-data:

rabbitmq-data:
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ dependencies = [
[tool.uv.sources]
torch = [
# TODO: add CUDA support
{ index = "pytorch-nightly", marker = "sys_platform == 'darwin'" },
{ index = "pytorch-cpu", marker = "sys_platform != 'darwin'" },
{ index = "pytorch-nightly" },
]

[[tool.uv.index]]
Expand Down
2 changes: 1 addition & 1 deletion scripts/worker_entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash
N_PROCESSING_WORKERS=
uv run python -m icij_worker workers start -g PYTHON -n "${N_PROCESSING_WORKERS:-1}" datashare_spacy_worker.app.app
uv run python -m icij_worker workers start -g PYTHON -n "${N_PROCESSING_WORKERS:-1}" ml_worker.app.app
80 changes: 43 additions & 37 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d0fa699

Please sign in to comment.