Skip to content

Commit

Permalink
add 4 spark workers
Browse files Browse the repository at this point in the history
  • Loading branch information
eslamdyab21 committed Dec 26, 2024
1 parent 0bde1a3 commit f34a111
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 13 deletions.
144 changes: 131 additions & 13 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,30 +1,139 @@
services:
spark-iceberg:
spark-master:
image: tabulario/spark-iceberg
container_name: spark-iceberg
container_name: spark-master
build: spark/
networks:
iceberg_net:
depends_on:
- rest
- minio
volumes:
- ./lakehouse:/home/iceberg/lakehouse
- ./notebooks:/home/iceberg/notebooks/notebooks

environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- SPARK_MODE=master

ports:
- 8888:8888
- 8080:8080
- 10000:10000
- 10001:10001
volumes:
- ./lakehouse:/home/iceberg/lakehouse
- ./notebooks:/home/iceberg/notebooks/notebooks

networks:
- iceberg_net

depends_on:
- rest
- minio


spark-worker-1:
image: tabulario/spark-iceberg
container_name: spark-worker-1
build: spark/

volumes:
- ./spark_workers.sh:/opt/spark/spark_workers.sh

networks:
- iceberg_net

environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_CORES=2
- SPARK_WORKER_MEMORY=1G

depends_on:
- spark-master

# command: ["/bin/bash", "/opt/spark/spark_workers.sh"]
# command: ["start-worker.sh", SPARK_MASTER_URL, "--cores", "2", "--memory", "1G"]
# command: ["/bin/bash", "-c", "start-worker.sh spark://spark-master:7077 --cores 2 --memory 1G"]
# command: ["/bin/bash", "spark_workers.sh"]


spark-worker-2:
image: tabulario/spark-iceberg
container_name: spark-worker-2
build: spark/

volumes:
- ./spark_workers.sh:/opt/spark/spark_workers.sh

networks:
- iceberg_net

environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_CORES=2
- SPARK_WORKER_MEMORY=1G

depends_on:
- spark-master

spark-worker-3:
image: tabulario/spark-iceberg
container_name: spark-worker-3
build: spark/

volumes:
- ./spark_workers.sh:/opt/spark/spark_workers.sh

networks:
- iceberg_net

environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_CORES=2
- SPARK_WORKER_MEMORY=1G

depends_on:
- spark-master


spark-worker-4:
image: tabulario/spark-iceberg
container_name: spark-worker-4
build: spark/

volumes:
- ./spark_workers.sh:/opt/spark/spark_workers.sh

networks:
- iceberg_net

environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_CORES=2
- SPARK_WORKER_MEMORY=1G

depends_on:
- spark-master




rest:
image: apache/iceberg-rest-fixture
container_name: iceberg-rest
networks:
iceberg_net:
- iceberg_net
ports:
- 8181:8181
environment:
Expand All @@ -34,6 +143,8 @@ services:
- CATALOG_WAREHOUSE=s3://warehouse/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000


minio:
image: minio/minio
container_name: minio
Expand All @@ -49,13 +160,16 @@ services:
- 9001:9001
- 9000:9000
command: ["server", "/data", "--console-address", ":9001"]



mc:
depends_on:
- minio
image: minio/mc
container_name: mc
networks:
iceberg_net:
- iceberg_net
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
Expand All @@ -68,5 +182,9 @@ services:
/usr/bin/mc policy set public minio/warehouse;
tail -f /dev/null
"
networks:
iceberg_net:
iceberg_net:
name: iceberg_net
12 changes: 12 additions & 0 deletions spark_workers.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

# Wait for the Spark Master to be ready
until (curl -s spark-master:8080 > /dev/null) && (curl -s 127.0.0.1:8080 > /dev/null); do
echo "Waiting for Spark Master to be ready..."
sleep 5
done

kill -9 $(ps aux | grep webui-port| awk '{print $2}')

# Start the worker
exec start-worker.sh $SPARK_MASTER_URL --cores $SPARK_WORKER_CORES --memory $SPARK_WORKER_MEMORY

0 comments on commit f34a111

Please sign in to comment.