From c8f08f9dfa39547b21f26422a6e2a160524b2a44 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 17 Aug 2023 00:10:17 -0500 Subject: [PATCH] feat(datahub-ingestion): refactor datahub ingestion slim images (#8515) --- .../docker-custom-build-and-push/action.yml | 5 + .github/workflows/docker-ingestion-base.yml | 45 --- .github/workflows/docker-ingestion.yml | 118 ------- .github/workflows/docker-unified.yml | 321 +++++++++++++++++- build.gradle | 2 +- datahub-frontend/build.gradle | 4 +- datahub-upgrade/build.gradle | 4 +- docker/build.gradle | 27 +- docker/datahub-ingestion-base/Dockerfile | 49 ++- .../base-requirements.txt | 6 +- docker/datahub-ingestion-base/build.gradle | 12 +- docker/datahub-ingestion-base/entrypoint.sh | 14 + docker/datahub-ingestion-slim/Dockerfile | 9 - docker/datahub-ingestion-slim/build.gradle | 39 --- docker/datahub-ingestion/Dockerfile | 57 ++-- docker/datahub-ingestion/build.gradle | 30 +- docker/docker-compose-with-cassandra.yml | 3 + docker/docker-compose-without-neo4j.yml | 3 + docker/docker-compose.yml | 3 + docker/elasticsearch-setup/build.gradle | 6 +- docker/kafka-setup/build.gradle | 6 +- docker/mysql-setup/build.gradle | 6 +- docker/postgres-setup/build.gradle | 6 +- .../docker-compose-m1.quickstart.yml | 2 + ...er-compose-without-neo4j-m1.quickstart.yml | 2 + ...ocker-compose-without-neo4j.quickstart.yml | 2 + .../quickstart/docker-compose.quickstart.yml | 2 + gradle/docker/docker.gradle | 2 + .../src/datahub/cli/docker_cli.py | 1 + metadata-jobs/mae-consumer-job/build.gradle | 4 +- metadata-jobs/mce-consumer-job/build.gradle | 4 +- metadata-service/war/build.gradle | 4 +- .../e2e/mutations/managed_ingestion.js | 3 +- 33 files changed, 500 insertions(+), 301 deletions(-) delete mode 100644 .github/workflows/docker-ingestion.yml create mode 100644 docker/datahub-ingestion-base/entrypoint.sh delete mode 100644 docker/datahub-ingestion-slim/Dockerfile delete mode 100644 docker/datahub-ingestion-slim/build.gradle diff --git a/.github/actions/docker-custom-build-and-push/action.yml b/.github/actions/docker-custom-build-and-push/action.yml index 96d4d759dbb84..bd6bb842b1fb8 100644 --- a/.github/actions/docker-custom-build-and-push/action.yml +++ b/.github/actions/docker-custom-build-and-push/action.yml @@ -30,6 +30,9 @@ inputs: # e.g. latest,head,sha12345 description: "List of tags to use for the Docker image" required: true + target: + description: "Sets the target stage to build" + required: false outputs: image_tag: description: "Docker image tags" @@ -62,6 +65,7 @@ runs: platforms: linux/amd64 build-args: ${{ inputs.build-args }} tags: ${{ steps.docker_meta.outputs.tags }} + target: ${{ inputs.target }} load: true push: false cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }} @@ -94,6 +98,7 @@ runs: platforms: ${{ inputs.platforms }} build-args: ${{ inputs.build-args }} tags: ${{ steps.docker_meta.outputs.tags }} + target: ${{ inputs.target }} push: true cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }} cache-to: type=inline diff --git a/.github/workflows/docker-ingestion-base.yml b/.github/workflows/docker-ingestion-base.yml index 0d29f79aa5f6c..e69de29bb2d1d 100644 --- a/.github/workflows/docker-ingestion-base.yml +++ b/.github/workflows/docker-ingestion-base.yml @@ -1,45 +0,0 @@ -name: ingestion base -on: - release: - types: [published] - push: - branches: - - master - paths: - - ".github/workflows/docker-ingestion-base.yml" - - "docker/datahub-ingestion-base/**" - - "gradle*" - pull_request: - branches: - - master - paths: - - ".github/workflows/docker-ingestion-base.yml" - - "docker/datahub-ingestion-base/**" - - "gradle*" - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - build-base: - name: Build and Push Docker Image to Docker Hub - runs-on: ubuntu-latest - steps: - - name: Check out the repo - uses: actions/checkout@v3 - with: - fetch-depth: 800 - - name: Build and Push image - uses: ./.github/actions/docker-custom-build-and-push - with: - images: | - acryldata/datahub-ingestion-base - tags: latest - username: ${{ secrets.ACRYL_DOCKER_USERNAME }} - password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - publish: ${{ github.ref == 'refs/heads/master' }} - context: . - file: ./docker/datahub-ingestion-base/Dockerfile - platforms: linux/amd64,linux/arm64/v8 diff --git a/.github/workflows/docker-ingestion.yml b/.github/workflows/docker-ingestion.yml deleted file mode 100644 index f3768cfde5002..0000000000000 --- a/.github/workflows/docker-ingestion.yml +++ /dev/null @@ -1,118 +0,0 @@ -name: datahub-ingestion docker -on: - push: - branches: - - master - paths-ignore: - - "docs/**" - - "**.md" - pull_request: - branches: - - master - paths: - - "metadata-ingestion/**" - - "metadata-models/**" - - "docker/datahub-ingestion/**" - - "docker/datahub-ingestion-slim/**" - - ".github/workflows/docker-ingestion.yml" - release: - types: [published] - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - setup: - runs-on: ubuntu-latest - outputs: - tag: ${{ steps.tag.outputs.tag }} - publish: ${{ steps.publish.outputs.publish }} - python_release_version: ${{ steps.python_release_version.outputs.release_version }} - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Compute Tag - id: tag - run: | - source .github/scripts/docker_helpers.sh - echo "tag=$(get_tag)" >> $GITHUB_OUTPUT - - name: Compute Python Release Version - id: python_release_version - run: | - source .github/scripts/docker_helpers.sh - echo "release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT - - name: Check whether publishing enabled - id: publish - env: - ENABLE_PUBLISH: ${{ secrets.DOCKER_PASSWORD }} - run: | - echo "Enable publish: ${{ env.ENABLE_PUBLISH != '' }}" - echo "publish=${{ env.ENABLE_PUBLISH != '' }}" >> $GITHUB_OUTPUT - push_to_registries: - name: Build and Push Docker Image to Docker Hub - runs-on: ubuntu-latest - needs: setup - steps: - - name: Check out the repo - uses: actions/checkout@v3 - with: - fetch-depth: 800 - - name: Build and push - uses: ./.github/actions/docker-custom-build-and-push - with: - images: | - linkedin/datahub-ingestion - tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - publish: ${{ needs.setup.outputs.publish == 'true' }} - context: . - file: ./docker/datahub-ingestion/Dockerfile - platforms: linux/amd64,linux/arm64/v8 - build-args: | - RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} - - name: Build and Push image (slim) - uses: ./.github/actions/docker-custom-build-and-push - with: - images: | - acryldata/datahub-ingestion-slim - tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.ACRYL_DOCKER_USERNAME }} - password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - publish: ${{ needs.setup.outputs.publish == 'true' }} - context: . - file: ./docker/datahub-ingestion-slim/Dockerfile - platforms: linux/amd64,linux/arm64/v8 - ingestion-slim_scan: - permissions: - contents: read # for actions/checkout to fetch code - security-events: write # for github/codeql-action/upload-sarif to upload SARIF results - actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status - name: "[Monitoring] Scan datahub-ingestion-slim images for vulnerabilities" - if: ${{ github.ref == 'refs/heads/master' }} - runs-on: ubuntu-latest - needs: [push_to_registries] - steps: - - name: Checkout # adding checkout step just to make trivy upload happy - uses: actions/checkout@v3 - - name: Download image - uses: ishworkh/docker-image-artifact-download@v1 - with: - image: acryldata/datahub-ingestion-slim:latest - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.8.0 - env: - TRIVY_OFFLINE_SCAN: true - with: - image-ref: acryldata/datahub-ingestion-slim:latest - format: "template" - template: "@/contrib/sarif.tpl" - output: "trivy-results.sarif" - severity: "CRITICAL,HIGH" - ignore-unfixed: true - vuln-type: "os,library" - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 - with: - sarif_file: "trivy-results.sarif" diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 1eb2a393600d2..c268a66938945 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -31,13 +31,19 @@ env: DATAHUB_ELASTIC_SETUP_IMAGE: "linkedin/datahub-elasticsearch-setup" DATAHUB_MYSQL_SETUP_IMAGE: "acryldata/datahub-mysql-setup" DATAHUB_UPGRADE_IMAGE: "acryldata/datahub-upgrade" + DATAHUB_INGESTION_BASE_IMAGE: "acryldata/datahub-ingestion-base" + DATAHUB_INGESTION_IMAGE: "acryldata/datahub-ingestion" jobs: setup: runs-on: ubuntu-latest outputs: tag: ${{ steps.tag.outputs.tag }} + slim_tag: ${{ steps.tag.outputs.slim_tag }} + full_tag: ${{ steps.tag.outputs.full_tag }} unique_tag: ${{ steps.tag.outputs.unique_tag }} + unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }} + unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }} publish: ${{ steps.publish.outputs.publish }} steps: - name: Checkout @@ -47,11 +53,15 @@ jobs: run: | source .github/scripts/docker_helpers.sh echo "tag=$(get_tag)" >> $GITHUB_OUTPUT + echo "slim_tag=$(get_tag)-slim" >> $GITHUB_OUTPUT + echo "full_tag=$(get_tag)-full" >> $GITHUB_OUTPUT echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT + echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT + echo "unique_full_tag=$(get_unique_tag)-full" >> $GITHUB_OUTPUT - name: Check whether publishing enabled id: publish env: - ENABLE_PUBLISH: ${{ secrets.DOCKER_PASSWORD }} + ENABLE_PUBLISH: ${{ secrets.DOCKER_PASSWORD != '' && secrets.ACRYL_DOCKER_PASSWORD != '' }} run: | echo "Enable publish: ${{ env.ENABLE_PUBLISH != '' }}" echo "publish=${{ env.ENABLE_PUBLISH != '' }}" >> $GITHUB_OUTPUT @@ -414,6 +424,291 @@ jobs: file: ./docker/elasticsearch-setup/Dockerfile platforms: linux/amd64,linux/arm64/v8 + datahub_ingestion_base_build: + name: Build and Push DataHub Ingestion (Base) Docker Image + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: setup + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + - name: Build and push Base Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: base + images: | + ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + tags: ${{ needs.setup.outputs.tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + build-args: | + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion-base/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute DataHub Ingestion (Base) Tag + id: tag + run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }}" >> $GITHUB_OUTPUT + datahub_ingestion_base_slim_build: + name: Build and Push DataHub Ingestion (Base-Slim) Docker Image + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: [setup, datahub_ingestion_base_build] + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + - name: Download Base Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + - name: Build and push Base-Slim Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: slim-install + images: | + ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + tags: ${{ needs.setup.outputs.slim_tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + build-args: | + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + APP_ENV=slim + BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion-base/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute DataHub Ingestion (Base-Slim) Tag + id: tag + run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.slim_tag || 'head' }}" >> $GITHUB_OUTPUT + datahub_ingestion_base_full_build: + name: Build and Push DataHub Ingestion (Base-Full) Docker Image + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: [setup, datahub_ingestion_base_build] + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + - name: Download Base Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + - name: Build and push Base-Full Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: full-install + images: | + ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + tags: ${{ needs.setup.outputs.full_tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + build-args: | + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + APP_ENV=full + BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion-base/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute DataHub Ingestion (Base-Full) Tag + id: tag + run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.full_tag || 'head' }}" >> $GITHUB_OUTPUT + + + datahub_ingestion_slim_build: + name: Build and Push DataHub Ingestion Docker Images + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: [setup, datahub_ingestion_base_slim_build] + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + datahub-ingestion: + - 'docker/datahub-ingestion/**' + - name: Build codegen + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + run: ./gradlew :metadata-ingestion:codegen + - name: Download Base Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.slim_tag || 'head' }} + - name: Build and push Slim Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: final + images: | + ${{ env.DATAHUB_INGESTION_IMAGE }} + build-args: | + BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.slim_tag || 'head' }} + APP_ENV=slim + tags: ${{ needs.setup.outputs.slim_tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute Tag + id: tag + run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.slim_tag || 'head' }}" >> $GITHUB_OUTPUT + datahub_ingestion_slim_scan: + permissions: + contents: read # for actions/checkout to fetch code + security-events: write # for github/codeql-action/upload-sarif to upload SARIF results + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + name: "[Monitoring] Scan Datahub Ingestion Slim images for vulnerabilities" + runs-on: ubuntu-latest + needs: [setup, datahub_ingestion_slim_build] + steps: + - name: Checkout # adding checkout step just to make trivy upload happy + uses: actions/checkout@v3 + - name: Download image Slim Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.slim_tag }} + - name: Run Trivy vulnerability scanner Slim Image + uses: aquasecurity/trivy-action@0.8.0 + env: + TRIVY_OFFLINE_SCAN: true + with: + image-ref: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.slim_tag }} + format: "template" + template: "@/contrib/sarif.tpl" + output: "trivy-results.sarif" + severity: "CRITICAL,HIGH" + ignore-unfixed: true + vuln-type: "os,library" + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: "trivy-results.sarif" + + datahub_ingestion_full_build: + name: Build and Push DataHub Ingestion (Full) Docker Images + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: [setup, datahub_ingestion_base_full_build] + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + datahub-ingestion: + - 'docker/datahub-ingestion/**' + - name: Build codegen + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + run: ./gradlew :metadata-ingestion:codegen + - name: Download Base Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.full_tag || 'head' }} + - name: Build and push Full Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: final + images: | + ${{ env.DATAHUB_INGESTION_IMAGE }} + build-args: | + BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.full_tag || 'head' }} + tags: ${{ needs.setup.outputs.full_tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute Tag (Full) + id: tag + run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.full_tag || 'head' }}" >> $GITHUB_OUTPUT + datahub_ingestion_full_scan: + permissions: + contents: read # for actions/checkout to fetch code + security-events: write # for github/codeql-action/upload-sarif to upload SARIF results + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + name: "[Monitoring] Scan Datahub Ingestion images for vulnerabilities" + runs-on: ubuntu-latest + needs: [setup, datahub_ingestion_full_build] + steps: + - name: Checkout # adding checkout step just to make trivy upload happy + uses: actions/checkout@v3 + - name: Download image Full Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_full_build.outputs.full_tag }} + - name: Run Trivy vulnerability scanner Full Image + uses: aquasecurity/trivy-action@0.8.0 + env: + TRIVY_OFFLINE_SCAN: true + with: + image-ref: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_full_build.outputs.full_tag }} + format: "template" + template: "@/contrib/sarif.tpl" + output: "trivy-results.sarif" + severity: "CRITICAL,HIGH" + ignore-unfixed: true + vuln-type: "os,library" + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: "trivy-results.sarif" + smoke_test: name: Run Smoke Tests runs-on: ubuntu-latest @@ -432,8 +727,11 @@ jobs: mae_consumer_build, mce_consumer_build, datahub_upgrade_build, + datahub_ingestion_slim_build, ] steps: + - name: Disk Check + run: df -h . && docker images - name: Check out the repo uses: actions/checkout@v3 - name: Set up JDK 11 @@ -450,6 +748,8 @@ jobs: - name: Build datahub cli run: | ./gradlew :metadata-ingestion:install + - name: Disk Check + run: df -h . && docker images - name: Download GMS image uses: ishworkh/docker-image-artifact-download@v1 if: ${{ needs.setup.outputs.publish != 'true' }} @@ -490,13 +790,21 @@ jobs: if: ${{ needs.setup.outputs.publish != 'true' }} with: image: ${{ env.DATAHUB_UPGRADE_IMAGE }}:${{ needs.setup.outputs.unique_tag }} - - name: Disable datahub-actions - run: | - yq -i 'del(.services.datahub-actions)' docker/quickstart/docker-compose-without-neo4j.quickstart.yml + - name: Download datahub-ingestion-slim image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.setup.outputs.unique_tag }} + - name: Disk Check + run: df -h . && docker images - name: run quickstart env: DATAHUB_TELEMETRY_ENABLED: false DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }} + DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }} + ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag }} + ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor] acryl-datahub-actions' + ACTIONS_CONFIG: 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml' run: | ./smoke-test/run-quickstart.sh - name: sleep 60s @@ -518,6 +826,8 @@ jobs: }' - name: Remove Source Code run: find ./*/* ! -path "./metadata-ingestion*" ! -path "./smoke-test*" ! -path "./gradle*" -delete + - name: Disk Check + run: df -h . && docker images - name: Smoke test env: RUN_QUICKSTART: false @@ -528,11 +838,14 @@ jobs: run: | echo "$DATAHUB_VERSION" ./smoke-test/smoke.sh + - name: Disk Check + run: df -h . && docker images - name: store logs if: failure() run: | docker ps -a docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log + docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log - name: Upload logs uses: actions/upload-artifact@v3 if: failure() diff --git a/build.gradle b/build.gradle index 4bb88b174eb34..ff1b8fa026925 100644 --- a/build.gradle +++ b/build.gradle @@ -38,7 +38,7 @@ buildscript { plugins { id 'com.gorylenko.gradle-git-properties' version '2.4.0-rc2' id 'com.github.johnrengelman.shadow' version '6.1.0' - id "com.palantir.docker" version "0.34.0" + id "com.palantir.docker" version "0.35.0" // https://blog.ltgt.net/javax-jakarta-mess-and-gradle-solution/ // TODO id "org.gradlex.java-ecosystem-capabilities" version "1.0" } diff --git a/datahub-frontend/build.gradle b/datahub-frontend/build.gradle index f21d10d8f3842..fda33e4a9a3c6 100644 --- a/datahub-frontend/build.gradle +++ b/datahub-frontend/build.gradle @@ -79,6 +79,8 @@ docker { files fileTree(rootProject.projectDir) { include 'docker/monitoring/*' include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -98,7 +100,7 @@ tasks.getByName("docker").dependsOn(unversionZip) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index ad2bf02bfdcc7..78d9f6a09948d 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -89,6 +89,8 @@ docker { files fileTree(rootProject.projectDir) { include "docker/${docker_repo}/*" include 'metadata-models/src/main/resources/*' + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -101,7 +103,7 @@ tasks.getByName("docker").dependsOn([bootJar]) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/build.gradle b/docker/build.gradle index f33e06f383240..829bc344411f3 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -35,8 +35,31 @@ task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') { environment "DATAHUB_TELEMETRY_ENABLED", "false" environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}" - environment "ACTIONS_VERSION", 'alpine3.17-slim' - environment "DATAHUB_ACTIONS_IMAGE", 'nginx' + // environment "ACTIONS_VERSION", 'alpine3.17-slim' + // environment "DATAHUB_ACTIONS_IMAGE", 'nginx' + + def cmd = [ + 'source ../metadata-ingestion/venv/bin/activate && ', + 'datahub docker quickstart', + '--no-pull-images', + '--standalone_consumers', + '--version', "v${version}", + '--dump-logs-on-failure' + ] + + commandLine 'bash', '-c', cmd.join(" ") +} + +task quickstartSlim(type: Exec, dependsOn: ':metadata-ingestion:install') { + dependsOn(([':docker:datahub-ingestion'] + quickstart_modules).collect { it + ':dockerTag' }) + shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke' + + environment "DATAHUB_TELEMETRY_ENABLED", "false" + environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}" + environment "DATAHUB_ACTIONS_IMAGE", "acryldata/datahub-ingestion" + environment "ACTIONS_VERSION", "v${version}-slim" + environment "ACTIONS_EXTRA_PACKAGES", 'acryl-datahub-actions[executor] acryl-datahub-actions' + environment "ACTIONS_CONFIG", 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml' def cmd = [ 'source ../metadata-ingestion/venv/bin/activate && ', diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 9893d44caf460..bb4b0bc42e167 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -1,3 +1,6 @@ +ARG APP_ENV=full +ARG BASE_IMAGE=base + FROM golang:1-alpine3.17 AS binary ENV DOCKERIZE_VERSION v0.6.1 @@ -16,9 +19,7 @@ ENV CONFLUENT_KAFKA_VERSION=1.6.1 ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update && apt-get install -y \ - && apt-get install -y -qq \ - # gcc \ +RUN apt-get update && apt-get install -y -qq \ make \ python3-ldap \ libldap2-dev \ @@ -31,15 +32,34 @@ RUN apt-get update && apt-get install -y \ zip \ unzip \ ldap-utils \ - openjdk-11-jre-headless \ - && python -m pip install --upgrade pip wheel setuptools==57.5.0 \ - && curl -Lk -o /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz https://github.com/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz \ - && tar -xzf /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz -C /root \ - && cd /root/librdkafka-${LIBRDKAFKA_VERSION} \ - && ./configure --prefix /usr && make && make install && make clean && ./configure --clean \ - && apt-get remove -y make + && python -m pip install --no-cache --upgrade pip wheel setuptools \ + && wget -q https://github.com/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz -O - | \ + tar -xz -C /root \ + && cd /root/librdkafka-${LIBRDKAFKA_VERSION} \ + && ./configure --prefix /usr && make && make install && cd .. && rm -rf /root/librdkafka-${LIBRDKAFKA_VERSION} \ + && apt-get remove -y make \ + && rm -rf /var/lib/apt/lists/* /var/cache/apk/* + +# compiled against newer golang for security fixes COPY --from=binary /go/bin/dockerize /usr/local/bin +COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt +COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh + +RUN pip install --no-cache -r requirements.txt && \ + pip uninstall -y acryl-datahub && \ + chmod +x /entrypoint.sh && \ + addgroup --gid 1000 datahub && \ + adduser --disabled-password --uid 1000 --gid 1000 --home /datahub-ingestion datahub + +ENTRYPOINT [ "/entrypoint.sh" ] + +FROM ${BASE_IMAGE} as full-install + +RUN apt-get update && apt-get install -y -qq \ + default-jre-headless \ + && rm -rf /var/lib/apt/lists/* /var/cache/apk/* + RUN if [ $(arch) = "x86_64" ]; then \ mkdir /opt/oracle && \ cd /opt/oracle && \ @@ -58,7 +78,10 @@ RUN if [ $(arch) = "x86_64" ]; then \ ldconfig; \ fi; -COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt +FROM ${BASE_IMAGE} as slim-install +# Do nothing else on top of base + +FROM ${APP_ENV}-install -RUN pip install -r requirements.txt && \ - pip uninstall -y acryl-datahub +USER datahub +ENV PATH="/datahub-ingestion/.local/bin:$PATH" diff --git a/docker/datahub-ingestion-base/base-requirements.txt b/docker/datahub-ingestion-base/base-requirements.txt index 3d9e0777e5ce0..82d9a93a9a2c3 100644 --- a/docker/datahub-ingestion-base/base-requirements.txt +++ b/docker/datahub-ingestion-base/base-requirements.txt @@ -1,3 +1,7 @@ +# Excluded for slim +# pyspark==3.0.3 +# pydeequ==1.0.1 + acryl-datahub-classify==0.0.6 acryl-iceberg-legacy==0.0.4 acryl-PyHive==0.6.13 @@ -253,7 +257,6 @@ pycryptodome==3.18.0 pycryptodomex==3.18.0 pydantic==1.10.8 pydash==7.0.3 -pydeequ==1.0.1 pydruid==0.6.5 Pygments==2.15.1 pymongo==4.3.3 @@ -261,7 +264,6 @@ PyMySQL==1.0.3 pyOpenSSL==22.0.0 pyparsing==3.0.9 pyrsistent==0.19.3 -pyspark==3.0.3 pyspnego==0.9.0 python-daemon==3.0.1 python-dateutil==2.8.2 diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle index fe3c12a59886f..10cd2ee71cce3 100644 --- a/docker/datahub-ingestion-base/build.gradle +++ b/docker/datahub-ingestion-base/build.gradle @@ -12,14 +12,17 @@ ext { } docker { - name "${docker_registry}/${docker_repo}:v${version}" - version "v${version}" + name "${docker_registry}/${docker_repo}:v${version}-slim" + version "v${version}-slim" dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } + buildArgs([APP_ENV: 'slim']) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -27,10 +30,11 @@ task mkdirBuildDocker { } } dockerClean.finalizedBy(mkdirBuildDocker) +dockerClean.dependsOn([':docker:datahub-ingestion:dockerClean']) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/docker/datahub-ingestion-base/entrypoint.sh b/docker/datahub-ingestion-base/entrypoint.sh new file mode 100644 index 0000000000000..518bb21561467 --- /dev/null +++ b/docker/datahub-ingestion-base/entrypoint.sh @@ -0,0 +1,14 @@ +#!/usr/bin/bash + +if [ ! -z "$ACTIONS_EXTRA_PACKAGES" ]; then + pip install --user $ACTIONS_EXTRA_PACKAGES +fi + +if [[ ! -z "$ACTIONS_CONFIG" && ! -z "$ACTIONS_EXTRA_PACKAGES" ]]; then + mkdir -p /tmp/datahub/logs + curl -q "$ACTIONS_CONFIG" -o config.yaml + exec dockerize -wait ${DATAHUB_GMS_PROTOCOL:-http}://$DATAHUB_GMS_HOST:$DATAHUB_GMS_PORT/health -timeout 240s \ + datahub actions --config config.yaml +else + exec datahub $@ +fi diff --git a/docker/datahub-ingestion-slim/Dockerfile b/docker/datahub-ingestion-slim/Dockerfile deleted file mode 100644 index 580dcc4277124..0000000000000 --- a/docker/datahub-ingestion-slim/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -# Defining environment -ARG APP_ENV=prod -ARG DOCKER_VERSION=latest - -FROM acryldata/datahub-ingestion:$DOCKER_VERSION as base - -USER 0 -RUN pip uninstall -y pyspark -USER datahub diff --git a/docker/datahub-ingestion-slim/build.gradle b/docker/datahub-ingestion-slim/build.gradle deleted file mode 100644 index f21b66b576a0c..0000000000000 --- a/docker/datahub-ingestion-slim/build.gradle +++ /dev/null @@ -1,39 +0,0 @@ -plugins { - id 'com.palantir.docker' - id 'java' // required for versioning -} - -apply from: "../../gradle/versioning/versioning.gradle" - -ext { - docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry - docker_repo = 'datahub-ingestion-slim' - docker_dir = 'datahub-ingestion-slim' -} - -docker { - name "${docker_registry}/${docker_repo}:v${version}" - version "v${version}" - dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") - files fileTree(rootProject.projectDir) { - include "docker/${docker_dir}/*" - } - buildArgs([DOCKER_VERSION: version]) - - buildx(false) -} -tasks.getByPath('docker').dependsOn(['build', ':docker:datahub-ingestion:docker']) - -task mkdirBuildDocker { - doFirst { - mkdir "${project.buildDir}/docker" - } -} -dockerClean.finalizedBy(mkdirBuildDocker) - -task cleanLocalDockerImages { - doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) - } -} -dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index 45a98efb7f6fb..d16caea2fcecd 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -1,42 +1,27 @@ # Defining environment -ARG APP_ENV=prod +ARG APP_ENV=full +ARG BASE_IMAGE=acryldata/datahub-ingestion-base ARG DOCKER_VERSION=latest -FROM acryldata/datahub-ingestion-base:$DOCKER_VERSION as base - -FROM eclipse-temurin:11 as prod-build -COPY . /datahub-src -WORKDIR /datahub-src -# We noticed that the gradle wrapper download failed frequently on in CI on arm64 machines. -# I suspect this was due because of the QEMU emulation slowdown, combined with the arm64 -# build being starved for CPU by the x86_64 build's codegen step. -# -# The middle step will attempt to download gradle wrapper 5 times with exponential backoff. -# The ./gradlew --version will force the download of the gradle wrapper but is otherwise a no-op. -# Note that the retry logic will always return success, so we should always attempt to run codegen. -# Inspired by https://github.com/gradle/gradle/issues/18124#issuecomment-958182335. -# and https://unix.stackexchange.com/a/82610/378179. -# This is a workaround for https://github.com/gradle/gradle/issues/18124. -RUN (for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2<<$attempt)) ; done ) && \ - ./gradlew :metadata-events:mxe-schemas:build - -FROM base as prod-codegen -COPY --from=prod-build /datahub-src /datahub-src -RUN cd /datahub-src/metadata-ingestion && \ - pip install -e ".[base]" && \ - ./scripts/codegen.sh - -FROM base as prod-install -COPY --from=prod-codegen /datahub-src/metadata-ingestion /datahub-ingestion -COPY --from=prod-codegen /root/.cache/pip /root/.cache/pip +FROM $BASE_IMAGE:$DOCKER_VERSION as base +USER 0 + +COPY ./metadata-ingestion /datahub-ingestion + ARG RELEASE_VERSION -RUN cd /datahub-ingestion && \ - sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ +WORKDIR /datahub-ingestion +RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ cat src/datahub/__init__.py && \ - pip install ".[all]" && \ - pip freeze && \ - # This is required to fix security vulnerability in htrace-core4 - rm -f /usr/local/lib/python3.10/site-packages/pyspark/jars/htrace-core4-4.1.0-incubating.jar + chown -R datahub /datahub-ingestion + +USER datahub +ENV PATH="/datahub-ingestion/.local/bin:$PATH" + +FROM base as slim-install +RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" + +FROM base as full-install +RUN pip install --no-cache --user ".[all]" FROM base as dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. @@ -44,7 +29,5 @@ FROM base as dev-install FROM ${APP_ENV}-install as final -RUN addgroup --system datahub && adduser --system datahub --ingroup datahub USER datahub - -ENTRYPOINT [ "datahub" ] +ENV PATH="/datahub-ingestion/.local/bin:$PATH" \ No newline at end of file diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle index 7a24d87794c0e..22531c0c4fd0e 100644 --- a/docker/datahub-ingestion/build.gradle +++ b/docker/datahub-ingestion/build.gradle @@ -11,24 +11,30 @@ ext { docker_dir = 'datahub-ingestion' } +dependencies { + project(':docker:datahub-ingestion-base') + project(':metadata-ingestion') +} + docker { - name "${docker_registry}/${docker_repo}:v${version}" - version "v${version}" + name "${docker_registry}/${docker_repo}:v${version}-slim" + version "v${version}-slim" dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" include "metadata-ingestion/**" - include "metadata-events/**" - include "metadata-models/**" - include "li-utils/**" - include "docs/**" - include "gradle/**" - include "buildSrc/**" - include "*" + }.exclude { + i -> i.file.isHidden() || + i.file == buildDir || + i.file == project(':metadata-ingestion').buildDir } - buildArgs([DOCKER_VERSION: version]) + buildArgs([DOCKER_VERSION: version, + RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace('-slim', ''), + APP_ENV: 'slim']) } -tasks.getByPath('docker').dependsOn(['build', ':docker:datahub-ingestion-base:docker']) +tasks.getByName('docker').dependsOn(['build', + ':docker:datahub-ingestion-base:docker', + ':metadata-ingestion:codegen']) task mkdirBuildDocker { doFirst { @@ -39,7 +45,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index 5ea364dd31ca7..08f8cc1ec9c45 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -26,6 +26,9 @@ services: hostname: actions image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} env_file: datahub-actions/env/docker.env + environment: + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} depends_on: datahub-gms: condition: service_healthy diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index 10b3f3c0eca5e..a755eda21cbf5 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -27,6 +27,9 @@ services: hostname: actions image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} env_file: datahub-actions/env/docker.env + environment: + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} depends_on: datahub-gms: condition: service_healthy diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 9228c11446ddf..d07ea5fa88f8b 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -26,6 +26,9 @@ services: hostname: actions image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} env_file: datahub-actions/env/docker.env + environment: + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} depends_on: datahub-gms: condition: service_healthy diff --git a/docker/elasticsearch-setup/build.gradle b/docker/elasticsearch-setup/build.gradle index cc2fe1ec5c4db..ffee3b9c65cf4 100644 --- a/docker/elasticsearch-setup/build.gradle +++ b/docker/elasticsearch-setup/build.gradle @@ -17,6 +17,8 @@ docker { files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" include "metadata-service/restli-servlet-impl/src/main/resources/index/**" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -25,7 +27,7 @@ docker { load(true) push(false) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -36,7 +38,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/docker/kafka-setup/build.gradle b/docker/kafka-setup/build.gradle index a5d33457e45f7..573ef21c88bf9 100644 --- a/docker/kafka-setup/build.gradle +++ b/docker/kafka-setup/build.gradle @@ -16,6 +16,8 @@ docker { dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -24,7 +26,7 @@ docker { load(true) push(false) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -35,7 +37,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/mysql-setup/build.gradle b/docker/mysql-setup/build.gradle index 48a28f15a581d..0d8941cce4833 100644 --- a/docker/mysql-setup/build.gradle +++ b/docker/mysql-setup/build.gradle @@ -17,6 +17,8 @@ docker { dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -25,7 +27,7 @@ docker { load(true) push(false) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -36,7 +38,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}") + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/postgres-setup/build.gradle b/docker/postgres-setup/build.gradle index a5b0413ec4be8..8a026be09d2b4 100644 --- a/docker/postgres-setup/build.gradle +++ b/docker/postgres-setup/build.gradle @@ -17,6 +17,8 @@ docker { dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -25,7 +27,7 @@ docker { load(true) push(false) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -36,7 +38,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}") + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 5a8edd6eacf19..38418bc8c41b9 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -34,6 +34,8 @@ services: datahub-gms: condition: service_healthy environment: + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - DATAHUB_GMS_HOST=datahub-gms - DATAHUB_GMS_PORT=8080 - DATAHUB_GMS_PROTOCOL=http diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index 6d51f2efcfcf2..cf879faa6a3f0 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -34,6 +34,8 @@ services: datahub-gms: condition: service_healthy environment: + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - DATAHUB_GMS_HOST=datahub-gms - DATAHUB_GMS_PORT=8080 - DATAHUB_GMS_PROTOCOL=http diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 48f2d797bd8a4..007830078d2b4 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -34,6 +34,8 @@ services: datahub-gms: condition: service_healthy environment: + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - DATAHUB_GMS_HOST=datahub-gms - DATAHUB_GMS_PORT=8080 - DATAHUB_GMS_PROTOCOL=http diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index bd30c359a2a76..390543b92123f 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -34,6 +34,8 @@ services: datahub-gms: condition: service_healthy environment: + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - DATAHUB_GMS_HOST=datahub-gms - DATAHUB_GMS_PORT=8080 - DATAHUB_GMS_PROTOCOL=http diff --git a/gradle/docker/docker.gradle b/gradle/docker/docker.gradle index f0bb4a5500b33..db2979a8ff6dc 100644 --- a/gradle/docker/docker.gradle +++ b/gradle/docker/docker.gradle @@ -21,6 +21,7 @@ ext.getDockerContainers = { ext.cleanLocalDockerImages = { String docker_registry, String docker_repo, String docker_tag -> + println("Docker image string: ${docker_registry}/${docker_repo}:${docker_tag}") def containers = getDockerContainers(docker_registry, docker_repo, docker_tag) if(!containers.isEmpty()) { println "Stopping containers: $containers" @@ -35,6 +36,7 @@ ext.cleanLocalDockerImages = { if(!images.isEmpty()) { println "Removing images: $images" exec { + ignoreExitValue true // may not work if used by downstream image commandLine = ["docker", "rmi", "-f"] + images } } diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index 918f610ce4635..9fde47c82873c 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -893,6 +893,7 @@ def download_compose_files( tmp_file.write(quickstart_download_response.content) logger.debug(f"Copied to {path}") if kafka_setup: + base_url = get_docker_compose_base_url(compose_git_ref) kafka_setup_github_file = f"{base_url}/{KAFKA_SETUP_QUICKSTART_COMPOSE_FILE}" default_kafka_compose_file = ( diff --git a/metadata-jobs/mae-consumer-job/build.gradle b/metadata-jobs/mae-consumer-job/build.gradle index e7941a04224e3..3811a9537ac24 100644 --- a/metadata-jobs/mae-consumer-job/build.gradle +++ b/metadata-jobs/mae-consumer-job/build.gradle @@ -43,6 +43,8 @@ docker { include 'docker/monitoring/*' include "docker/${docker_repo}/*" include 'metadata-models/src/main/resources/*' + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -55,7 +57,7 @@ tasks.getByName("docker").dependsOn([bootJar]) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/metadata-jobs/mce-consumer-job/build.gradle b/metadata-jobs/mce-consumer-job/build.gradle index 5981284e9da3f..2229c387f3676 100644 --- a/metadata-jobs/mce-consumer-job/build.gradle +++ b/metadata-jobs/mce-consumer-job/build.gradle @@ -56,6 +56,8 @@ docker { include 'docker/monitoring/*' include "docker/${docker_repo}/*" include 'metadata-models/src/main/resources/*' + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -68,7 +70,7 @@ tasks.getByName("docker").dependsOn([bootJar]) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle index 7e9aa90664611..eaf14f7fd6c18 100644 --- a/metadata-service/war/build.gradle +++ b/metadata-service/war/build.gradle @@ -72,6 +72,8 @@ docker { include 'docker/monitoring/*' include "docker/${docker_repo}/*" include 'metadata-models/src/main/resources/*' + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -84,7 +86,7 @@ tasks.getByName("docker").dependsOn([build, war]) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}") + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js index ddda8626fba2f..24a24cc21138d 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js @@ -31,8 +31,7 @@ describe("run managed ingestion", () => { cy.waitTextVisible(testName) cy.contains(testName).parent().within(() => { - // TODO: Skipping until disk size resolved - // cy.contains("Succeeded", {timeout: 30000}) + cy.contains("Succeeded", {timeout: 180000}) cy.clickOptionWithTestId("delete-button"); }) cy.clickOptionWithText("Yes")