Skip to content

Commit

Permalink
feat(datahub-ingestion): refactor slim image to add vs remove pyspark…
Browse files Browse the repository at this point in the history
… deps
  • Loading branch information
david-leifker committed Jul 25, 2023
1 parent 27392f9 commit 6996ffe
Show file tree
Hide file tree
Showing 15 changed files with 37 additions and 16 deletions.
1 change: 1 addition & 0 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@ jobs:
env:
DATAHUB_TELEMETRY_ENABLED: false
DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }}
ACTIONS_BASE: "-slim"
run: |
./smoke-test/run-quickstart.sh
- name: sleep 60s
Expand Down
20 changes: 20 additions & 0 deletions docker/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,26 @@ task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') {
commandLine 'bash', '-c', cmd.join(" ")
}

task quickstartSlim(type: Exec, dependsOn: ':metadata-ingestion:install') {
dependsOn(quickstart_modules.collect { it + ':dockerTag' })
shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke'

environment "DATAHUB_TELEMETRY_ENABLED", "false"
environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
environment "ACTIONS_BASE", "-slim"

def cmd = [
'source ../metadata-ingestion/venv/bin/activate && ',
'datahub docker quickstart',
'--no-pull-images',
'--standalone_consumers',
'--version', "v${version}",
'--dump-logs-on-failure'
]

commandLine 'bash', '-c', cmd.join(" ")
}

task quickstartNuke(type: Exec, dependsOn: ":metadata-ingestion:install") {
shouldRunAfter(':metadata-ingestion:clean')

Expand Down
6 changes: 2 additions & 4 deletions docker/datahub-ingestion-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ ENV CONFLUENT_KAFKA_VERSION=1.6.1

ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update && apt-get install -y \
&& apt-get install -y -qq \
# gcc \
RUN apt-get update && apt-get install -y -qq \
make \
python3-ldap \
libldap2-dev \
Expand All @@ -31,7 +29,7 @@ RUN apt-get update && apt-get install -y \
zip \
unzip \
ldap-utils \
openjdk-11-jre-headless \
default-jre-headless \
&& python -m pip install --upgrade pip wheel setuptools==57.5.0 \
&& curl -Lk -o /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz https://github.com/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz \
&& tar -xzf /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz -C /root \
Expand Down
2 changes: 0 additions & 2 deletions docker/datahub-ingestion-base/base-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -253,15 +253,13 @@ pycryptodome==3.18.0
pycryptodomex==3.18.0
pydantic==1.10.8
pydash==7.0.3
pydeequ==1.0.1
pydruid==0.6.5
Pygments==2.15.1
pymongo==4.3.3
PyMySQL==1.0.3
pyOpenSSL==22.0.0
pyparsing==3.0.9
pyrsistent==0.19.3
pyspark==3.0.3
pyspnego==0.9.0
python-daemon==3.0.1
python-dateutil==2.8.2
Expand Down
3 changes: 0 additions & 3 deletions docker/datahub-ingestion-slim/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,3 @@ ARG DOCKER_VERSION=latest

FROM acryldata/datahub-ingestion:$DOCKER_VERSION as base

USER 0
RUN pip uninstall -y pyspark
USER datahub
4 changes: 4 additions & 0 deletions docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ RUN (for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to
./gradlew :metadata-events:mxe-schemas:build

FROM base as prod-codegen

COPY ./docker/datahub-ingestion/requirements.txt requirements.txt
RUN pip install -r requirements.txt

COPY --from=prod-build /datahub-src /datahub-src
RUN cd /datahub-src/metadata-ingestion && \
pip install -e ".[base]" && \
Expand Down
2 changes: 2 additions & 0 deletions docker/datahub-ingestion/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pyspark==3.0.3
pydeequ==1.0.1
2 changes: 1 addition & 1 deletion docker/docker-compose-with-cassandra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ services:
datahub-actions:
container_name: datahub-actions
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: acryldata/datahub-actions${ACTIONS_BASE:}:${ACTIONS_VERSION:-head}
env_file: datahub-actions/env/docker.env
depends_on:
datahub-gms:
Expand Down
2 changes: 1 addition & 1 deletion docker/docker-compose-without-neo4j.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ services:
datahub-actions:
container_name: datahub-actions
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: acryldata/datahub-actions${ACTIONS_BASE:}:${ACTIONS_VERSION:-head}
env_file: datahub-actions/env/docker.env
depends_on:
datahub-gms:
Expand Down
2 changes: 1 addition & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ services:
datahub-actions:
container_name: datahub-actions
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: acryldata/datahub-actions${ACTIONS_BASE:}:${ACTIONS_VERSION:-head}
env_file: datahub-actions/env/docker.env
depends_on:
datahub-gms:
Expand Down
2 changes: 1 addition & 1 deletion docker/quickstart/docker-compose-m1.quickstart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ services:
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: acryldata/datahub-actions${ACTIONS_BASE:}:${ACTIONS_VERSION:-head}
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ services:
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: acryldata/datahub-actions${ACTIONS_BASE:}:${ACTIONS_VERSION:-head}
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ services:
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: acryldata/datahub-actions${ACTIONS_BASE:}:${ACTIONS_VERSION:-head}
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
Expand Down
2 changes: 1 addition & 1 deletion docker/quickstart/docker-compose.quickstart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ services:
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
hostname: actions
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
image: acryldata/datahub-actions${ACTIONS_BASE:}:${ACTIONS_VERSION:-head}
datahub-frontend-react:
container_name: datahub-frontend-react
depends_on:
Expand Down
1 change: 1 addition & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ The environment variables listed below take precedence over the DataHub CLI conf
- `DATAHUB_DEBUG` (default `false`) - Set to `true` to enable debug logging for CLI. Can also be achieved through `--debug` option of the CLI.
- `DATAHUB_VERSION` (default `head`) - Set to a specific version to run quickstart with the particular version of docker images.
- `ACTIONS_VERSION` (default `head`) - Set to a specific version to run quickstart with that image tag of `datahub-actions` container.
- `ACTIONS_BASE` (default ``) - Set to `-slim` to run a slimmer actions container without pyspark/deequ features.

```shell
DATAHUB_SKIP_CONFIG=false
Expand Down

0 comments on commit 6996ffe

Please sign in to comment.