diff --git a/.github/actions/docker-custom-build-and-push/action.yml b/.github/actions/docker-custom-build-and-push/action.yml index 1e0054503c17d..c401502cc508f 100644 --- a/.github/actions/docker-custom-build-and-push/action.yml +++ b/.github/actions/docker-custom-build-and-push/action.yml @@ -30,6 +30,9 @@ inputs: # e.g. latest,head,sha12345 description: "List of tags to use for the Docker image" required: true + target: + description: "Sets the target stage to build" + required: false outputs: image_tag: description: "Docker image tags" @@ -62,6 +65,7 @@ runs: platforms: linux/amd64 build-args: ${{ inputs.build-args }} tags: ${{ steps.docker_meta.outputs.tags }} + target: ${{ inputs.target }} load: true push: false - name: Upload image locally for testing (if not publishing) @@ -92,6 +96,7 @@ runs: platforms: ${{ inputs.platforms }} build-args: ${{ inputs.build-args }} tags: ${{ steps.docker_meta.outputs.tags }} + target: ${{ inputs.target }} push: true # TODO add code for vuln scanning? diff --git a/.github/workflows/docker-ingestion-base.yml b/.github/workflows/docker-ingestion-base.yml index 9385311f99595..e69de29bb2d1d 100644 --- a/.github/workflows/docker-ingestion-base.yml +++ b/.github/workflows/docker-ingestion-base.yml @@ -1,45 +0,0 @@ -name: ingestion base -on: - release: - types: [published] - push: - branches: - - master - paths: - - ".github/workflows/docker-ingestion-base.yml" - - "docker/datahub-ingestion-base/**" - - "gradle*" - pull_request: - branches: - - master - paths: - - ".github/workflows/docker-ingestion-base.yml" - - "docker/datahub-ingestion-base/**" - - "gradle*" - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - build-base: - name: Build and Push Docker Image to Docker Hub - runs-on: ubuntu-latest - steps: - - name: Check out the repo - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - name: Build and Push image - uses: ./.github/actions/docker-custom-build-and-push - with: - images: | - heruko/datahub-ingestion-base - tags: latest - username: ${{ secrets.ACRYL_DOCKER_USERNAME }} - password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - publish: ${{ github.ref == 'refs/heads/master' }} - context: . - file: ./docker/datahub-ingestion-base/Dockerfile - platforms: linux/amd64,linux/arm64/v8 diff --git a/.github/workflows/docker-ingestion.yml b/.github/workflows/docker-ingestion.yml deleted file mode 100644 index a0a6fb4768a44..0000000000000 --- a/.github/workflows/docker-ingestion.yml +++ /dev/null @@ -1,118 +0,0 @@ -name: datahub-ingestion docker -on: - push: - branches: - - master - paths-ignore: - - "docs/**" - - "**.md" - pull_request: - branches: - - master - paths: - - "metadata-ingestion/**" - - "metadata-models/**" - - "docker/datahub-ingestion/**" - - "docker/datahub-ingestion-slim/**" - - ".github/workflows/docker-ingestion.yml" - release: - types: [published] - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - setup: - runs-on: ubuntu-latest - outputs: - tag: ${{ steps.tag.outputs.tag }} - publish: ${{ steps.publish.outputs.publish }} - python_release_version: ${{ steps.python_release_version.outputs.release_version }} - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Compute Tag - id: tag - run: | - source .github/scripts/docker_helpers.sh - echo "tag=$(get_tag)" >> $GITHUB_OUTPUT - - name: Compute Python Release Version - id: python_release_version - run: | - source .github/scripts/docker_helpers.sh - echo "release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT - - name: Check whether publishing enabled - id: publish - env: - ENABLE_PUBLISH: ${{ secrets.DOCKER_PASSWORD }} - run: | - echo "Enable publish: ${{ env.ENABLE_PUBLISH != '' }}" - echo "publish=${{ env.ENABLE_PUBLISH != '' }}" >> $GITHUB_OUTPUT - push_to_registries: - name: Build and Push Docker Image to Docker Hub - runs-on: ubuntu-latest - needs: setup - steps: - - name: Check out the repo - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - name: Build and push - uses: ./.github/actions/docker-custom-build-and-push - with: - images: | - heruko/datahub-ingestion - tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - publish: ${{ needs.setup.outputs.publish == 'true' }} - context: . - file: ./docker/datahub-ingestion/Dockerfile - platforms: linux/amd64,linux/arm64/v8 - build-args: | - RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} - - name: Build and Push image (slim) - uses: ./.github/actions/docker-custom-build-and-push - with: - images: | - heruko/datahub-ingestion-slim - tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.ACRYL_DOCKER_USERNAME }} - password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - publish: ${{ needs.setup.outputs.publish == 'true' }} - context: . - file: ./docker/datahub-ingestion-slim/Dockerfile - platforms: linux/amd64,linux/arm64/v8 - ingestion-slim_scan: - permissions: - contents: read # for actions/checkout to fetch code - security-events: write # for github/codeql-action/upload-sarif to upload SARIF results - actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status - name: "[Monitoring] Scan datahub-ingestion-slim images for vulnerabilities" - if: ${{ github.ref == 'refs/heads/master' }} - runs-on: ubuntu-latest - needs: [push_to_registries] - steps: - - name: Checkout # adding checkout step just to make trivy upload happy - uses: actions/checkout@v3 - - name: Download image - uses: ishworkh/docker-image-artifact-download@v1 - with: - image: heruko/datahub-ingestion-slim:latest - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.8.0 - env: - TRIVY_OFFLINE_SCAN: true - with: - image-ref: heruko/datahub-ingestion-slim:latest - format: "template" - template: "@/contrib/sarif.tpl" - output: "trivy-results.sarif" - severity: "CRITICAL,HIGH" - ignore-unfixed: true - vuln-type: "os,library" - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 - with: - sarif_file: "trivy-results.sarif" diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index f37835d14e4c8..4dd8a2a9b9193 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -32,13 +32,19 @@ env: DATAHUB_MYSQL_SETUP_IMAGE: 'heruko/datahub-mysql-setup' DATAHUB_UPGRADE_IMAGE: 'heruko/datahub-upgrade' INGEST_API_IMAGE: 'heruko/ingest-api' + DATAHUB_INGESTION_BASE_IMAGE: "heruko/datahub-ingestion-base" + DATAHUB_INGESTION_IMAGE: "heruko/datahub-ingestion" jobs: setup: runs-on: ubuntu-latest outputs: tag: ${{ steps.tag.outputs.tag }} + slim_tag: ${{ steps.tag.outputs.slim_tag }} + full_tag: ${{ steps.tag.outputs.full_tag }} unique_tag: ${{ steps.tag.outputs.unique_tag }} + unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }} + unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }} publish: ${{ steps.publish.outputs.publish }} steps: - name: Checkout @@ -48,11 +54,15 @@ jobs: run: | source .github/scripts/docker_helpers.sh echo "tag=$(get_tag)" >> $GITHUB_OUTPUT + echo "slim_tag=$(get_tag)-slim" >> $GITHUB_OUTPUT + echo "full_tag=$(get_tag)-full" >> $GITHUB_OUTPUT echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT + echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT + echo "unique_full_tag=$(get_unique_tag)-full" >> $GITHUB_OUTPUT - name: Check whether publishing enabled id: publish env: - ENABLE_PUBLISH: ${{ secrets.DOCKER_PASSWORD }} + ENABLE_PUBLISH: ${{ secrets.DOCKER_PASSWORD != '' && secrets.ACRYL_DOCKER_PASSWORD != '' }} run: | echo "Enable publish: ${{ env.ENABLE_PUBLISH != '' }}" echo "publish=${{ env.ENABLE_PUBLISH != '' }}" >> $GITHUB_OUTPUT @@ -65,7 +75,7 @@ jobs: - name: Check out the repo uses: actions/checkout@v3 with: - fetch-depth: 0 + fetch-depth: 800 - name: Pre-build artifacts for docker image run: | ./gradlew :metadata-service:war:build -x test --parallel @@ -123,7 +133,7 @@ jobs: - name: Check out the repo uses: actions/checkout@v3 with: - fetch-depth: 0 + fetch-depth: 800 - name: Pre-build artifacts for docker image run: | ./gradlew :metadata-jobs:mae-consumer-job:build -x test --parallel @@ -181,7 +191,7 @@ jobs: - name: Check out the repo uses: actions/checkout@v3 with: - fetch-depth: 0 + fetch-depth: 800 - name: Pre-build artifacts for docker image run: | ./gradlew :metadata-jobs:mce-consumer-job:build -x test --parallel @@ -239,7 +249,7 @@ jobs: - name: Check out the repo uses: actions/checkout@v3 with: - fetch-depth: 0 + fetch-depth: 800 - name: Pre-build artifacts for docker image run: | ./gradlew :datahub-upgrade:build -x test --parallel @@ -297,7 +307,7 @@ jobs: - name: Check out the repo uses: actions/checkout@v3 with: - fetch-depth: 0 + fetch-depth: 800 - name: Pre-build artifacts for docker image run: | ./gradlew :datahub-frontend:dist -x test -x yarnTest -x yarnLint --parallel @@ -357,7 +367,7 @@ jobs: - name: Check out the repo uses: actions/checkout@v3 with: - fetch-depth: 0 + fetch-depth: 800 - name: Build and push uses: ./.github/actions/docker-custom-build-and-push with: @@ -379,7 +389,7 @@ jobs: - name: Check out the repo uses: actions/checkout@v3 with: - fetch-depth: 0 + fetch-depth: 800 - name: Build and push uses: ./.github/actions/docker-custom-build-and-push with: @@ -401,7 +411,7 @@ jobs: - name: Check out the repo uses: actions/checkout@v3 with: - fetch-depth: 0 + fetch-depth: 800 - name: Build and push uses: ./.github/actions/docker-custom-build-and-push with: @@ -415,6 +425,291 @@ jobs: file: ./docker/elasticsearch-setup/Dockerfile platforms: linux/amd64,linux/arm64/v8 + datahub_ingestion_base_build: + name: Build and Push DataHub Ingestion (Base) Docker Image + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: setup + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + - name: Build and push Base Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: base + images: | + ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + tags: ${{ needs.setup.outputs.tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + build-args: | + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion-base/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute DataHub Ingestion (Base) Tag + id: tag + run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }}" >> $GITHUB_OUTPUT + datahub_ingestion_base_slim_build: + name: Build and Push DataHub Ingestion (Base-Slim) Docker Image + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: [setup, datahub_ingestion_base_build] + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + - name: Download Base Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + - name: Build and push Base-Slim Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: slim-install + images: | + ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + tags: ${{ needs.setup.outputs.slim_tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + build-args: | + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + APP_ENV=slim + BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion-base/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute DataHub Ingestion (Base-Slim) Tag + id: tag + run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.slim_tag || 'head' }}" >> $GITHUB_OUTPUT + datahub_ingestion_base_full_build: + name: Build and Push DataHub Ingestion (Base-Full) Docker Image + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: [setup, datahub_ingestion_base_build] + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + - name: Download Base Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + - name: Build and push Base-Full Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: full-install + images: | + ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + tags: ${{ needs.setup.outputs.full_tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + build-args: | + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + APP_ENV=full + BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion-base/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute DataHub Ingestion (Base-Full) Tag + id: tag + run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.full_tag || 'head' }}" >> $GITHUB_OUTPUT + + + datahub_ingestion_slim_build: + name: Build and Push DataHub Ingestion Docker Images + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: [setup, datahub_ingestion_base_slim_build] + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + datahub-ingestion: + - 'docker/datahub-ingestion/**' + - name: Build codegen + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + run: ./gradlew :metadata-ingestion:codegen + - name: Download Base Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.slim_tag || 'head' }} + - name: Build and push Slim Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: final + images: | + ${{ env.DATAHUB_INGESTION_IMAGE }} + build-args: | + BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.slim_tag || 'head' }} + APP_ENV=slim + tags: ${{ needs.setup.outputs.slim_tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute Tag + id: tag + run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.slim_tag || 'head' }}" >> $GITHUB_OUTPUT + datahub_ingestion_slim_scan: + permissions: + contents: read # for actions/checkout to fetch code + security-events: write # for github/codeql-action/upload-sarif to upload SARIF results + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + name: "[Monitoring] Scan Datahub Ingestion Slim images for vulnerabilities" + runs-on: ubuntu-latest + needs: [setup, datahub_ingestion_slim_build] + steps: + - name: Checkout # adding checkout step just to make trivy upload happy + uses: actions/checkout@v3 + - name: Download image Slim Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.slim_tag }} + - name: Run Trivy vulnerability scanner Slim Image + uses: aquasecurity/trivy-action@0.8.0 + env: + TRIVY_OFFLINE_SCAN: true + with: + image-ref: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.slim_tag }} + format: "template" + template: "@/contrib/sarif.tpl" + output: "trivy-results.sarif" + severity: "CRITICAL,HIGH" + ignore-unfixed: true + vuln-type: "os,library" + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: "trivy-results.sarif" + + datahub_ingestion_full_build: + name: Build and Push DataHub Ingestion (Full) Docker Images + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.tag.outputs.tag }} + needs: [setup, datahub_ingestion_base_full_build] + steps: + - name: Check out the repo + uses: actions/checkout@v3 + with: + fetch-depth: 800 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + datahub-ingestion-base: + - 'docker/datahub-ingestion-base/**' + datahub-ingestion: + - 'docker/datahub-ingestion/**' + - name: Build codegen + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + run: ./gradlew :metadata-ingestion:codegen + - name: Download Base Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.full_tag || 'head' }} + - name: Build and push Full Image + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + uses: ./.github/actions/docker-custom-build-and-push + with: + target: final + images: | + ${{ env.DATAHUB_INGESTION_IMAGE }} + build-args: | + BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.full_tag || 'head' }} + tags: ${{ needs.setup.outputs.full_tag }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + publish: ${{ needs.setup.outputs.publish }} + context: . + file: ./docker/datahub-ingestion/Dockerfile + platforms: linux/amd64,linux/arm64/v8 + - name: Compute Tag (Full) + id: tag + run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.full_tag || 'head' }}" >> $GITHUB_OUTPUT + datahub_ingestion_full_scan: + permissions: + contents: read # for actions/checkout to fetch code + security-events: write # for github/codeql-action/upload-sarif to upload SARIF results + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + name: "[Monitoring] Scan Datahub Ingestion images for vulnerabilities" + runs-on: ubuntu-latest + needs: [setup, datahub_ingestion_full_build] + steps: + - name: Checkout # adding checkout step just to make trivy upload happy + uses: actions/checkout@v3 + - name: Download image Full Image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_full_build.outputs.full_tag }} + - name: Run Trivy vulnerability scanner Full Image + uses: aquasecurity/trivy-action@0.8.0 + env: + TRIVY_OFFLINE_SCAN: true + with: + image-ref: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_full_build.outputs.full_tag }} + format: "template" + template: "@/contrib/sarif.tpl" + output: "trivy-results.sarif" + severity: "CRITICAL,HIGH" + ignore-unfixed: true + vuln-type: "os,library" + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: "trivy-results.sarif" + smoke_test: name: Run Smoke Tests runs-on: ubuntu-latest @@ -433,8 +728,11 @@ jobs: mae_consumer_build, mce_consumer_build, datahub_upgrade_build, + datahub_ingestion_slim_build, ] steps: + - name: Disk Check + run: df -h . && docker images - name: Check out the repo uses: actions/checkout@v3 - name: Set up JDK 11 @@ -451,6 +749,8 @@ jobs: - name: Build datahub cli run: | ./gradlew :metadata-ingestion:install + - name: Disk Check + run: df -h . && docker images - name: Download GMS image uses: ishworkh/docker-image-artifact-download@v1 if: ${{ needs.setup.outputs.publish != 'true' }} @@ -491,10 +791,21 @@ jobs: if: ${{ needs.setup.outputs.publish != 'true' }} with: image: ${{ env.DATAHUB_UPGRADE_IMAGE }}:${{ needs.setup.outputs.unique_tag }} + - name: Download datahub-ingestion-slim image + uses: ishworkh/docker-image-artifact-download@v1 + if: ${{ needs.setup.outputs.publish != 'true' }} + with: + image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.setup.outputs.unique_tag }} + - name: Disk Check + run: df -h . && docker images - name: run quickstart env: DATAHUB_TELEMETRY_ENABLED: false DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }} + DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }} + ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag }} + ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor] acryl-datahub-actions' + ACTIONS_CONFIG: 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml' run: | ./smoke-test/run-quickstart.sh - name: sleep 60s @@ -502,6 +813,22 @@ jobs: # we are doing this because gms takes time to get ready # and we don't have a better readiness check when bootstrap is done sleep 60s + - name: Disable ES Disk Threshold + run: | + curl -XPUT "http://localhost:9200/_cluster/settings" \ + -H 'Content-Type: application/json' -d'{ + "persistent": { + "cluster": { + "routing": { + "allocation.disk.threshold_enabled": false + } + } + } + }' + - name: Remove Source Code + run: find ./*/* ! -path "./metadata-ingestion*" ! -path "./smoke-test*" ! -path "./gradle*" -delete + - name: Disk Check + run: df -h . && docker images - name: Smoke test env: RUN_QUICKSTART: false @@ -512,11 +839,14 @@ jobs: run: | echo "$DATAHUB_VERSION" ./smoke-test/smoke.sh + - name: Disk Check + run: df -h . && docker images - name: store logs if: failure() run: | docker ps -a docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log + docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log - name: Upload logs uses: actions/upload-artifact@v3 if: failure() diff --git a/build.gradle b/build.gradle index 2c707c4da2458..c2c41820633bf 100644 --- a/build.gradle +++ b/build.gradle @@ -39,7 +39,7 @@ buildscript { plugins { id 'com.gorylenko.gradle-git-properties' version '2.4.0-rc2' id 'com.github.johnrengelman.shadow' version '6.1.0' - id "com.palantir.docker" version "0.34.0" + id "com.palantir.docker" version "0.35.0" // https://blog.ltgt.net/javax-jakarta-mess-and-gradle-solution/ // TODO id "org.gradlex.java-ecosystem-capabilities" version "1.0" } diff --git a/datahub-frontend/build.gradle b/datahub-frontend/build.gradle index f21d10d8f3842..fda33e4a9a3c6 100644 --- a/datahub-frontend/build.gradle +++ b/datahub-frontend/build.gradle @@ -79,6 +79,8 @@ docker { files fileTree(rootProject.projectDir) { include 'docker/monitoring/*' include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -98,7 +100,7 @@ tasks.getByName("docker").dependsOn(unversionZip) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index cd4a3ebcdade1..0bd03d8c7d2cb 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -85,6 +85,8 @@ docker { files fileTree(rootProject.projectDir) { include "docker/${docker_repo}/*" include 'metadata-models/src/main/resources/*' + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -97,7 +99,7 @@ tasks.getByName("docker").dependsOn([bootJar]) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/build.gradle b/docker/build.gradle index 16f7b46467cc6..366ae6a14c3dc 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -35,6 +35,31 @@ task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') { environment "DATAHUB_TELEMETRY_ENABLED", "false" environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}" + // environment "ACTIONS_VERSION", 'alpine3.17-slim' + // environment "DATAHUB_ACTIONS_IMAGE", 'nginx' + + def cmd = [ + 'source ../metadata-ingestion/venv/bin/activate && ', + 'datahub docker quickstart', + '--no-pull-images', + '--standalone_consumers', + '--version', "v${version}", + '--dump-logs-on-failure' + ] + + commandLine 'bash', '-c', cmd.join(" ") +} + +task quickstartSlim(type: Exec, dependsOn: ':metadata-ingestion:install') { + dependsOn(([':docker:datahub-ingestion'] + quickstart_modules).collect { it + ':dockerTag' }) + shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke' + + environment "DATAHUB_TELEMETRY_ENABLED", "false" + environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}" + environment "DATAHUB_ACTIONS_IMAGE", "heruko/datahub-ingestion" + environment "ACTIONS_VERSION", "v${version}-slim" + environment "ACTIONS_EXTRA_PACKAGES", 'acryl-datahub-actions[executor] acryl-datahub-actions' + environment "ACTIONS_CONFIG", 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml' def cmd = [ 'source ../metadata-ingestion/venv/bin/activate && ', diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 2468f2119bcfc..bb4b0bc42e167 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -1,3 +1,6 @@ +ARG APP_ENV=full +ARG BASE_IMAGE=base + FROM golang:1-alpine3.17 AS binary ENV DOCKERIZE_VERSION v0.6.1 @@ -16,9 +19,7 @@ ENV CONFLUENT_KAFKA_VERSION=1.6.1 ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update && apt-get install -y \ - && apt-get install -y -qq \ - # gcc \ +RUN apt-get update && apt-get install -y -qq \ make \ python3-ldap \ libldap2-dev \ @@ -31,15 +32,34 @@ RUN apt-get update && apt-get install -y \ zip \ unzip \ ldap-utils \ - openjdk-17-jre-headless \ - && python -m pip install --upgrade pip wheel setuptools==57.5.0 \ - && curl -Lk -o /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz https://github.com/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz \ - && tar -xzf /root/librdkafka-${LIBRDKAFKA_VERSION}.tar.gz -C /root \ - && cd /root/librdkafka-${LIBRDKAFKA_VERSION} \ - && ./configure --prefix /usr && make && make install && make clean && ./configure --clean \ - && apt-get remove -y make + && python -m pip install --no-cache --upgrade pip wheel setuptools \ + && wget -q https://github.com/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz -O - | \ + tar -xz -C /root \ + && cd /root/librdkafka-${LIBRDKAFKA_VERSION} \ + && ./configure --prefix /usr && make && make install && cd .. && rm -rf /root/librdkafka-${LIBRDKAFKA_VERSION} \ + && apt-get remove -y make \ + && rm -rf /var/lib/apt/lists/* /var/cache/apk/* + +# compiled against newer golang for security fixes COPY --from=binary /go/bin/dockerize /usr/local/bin +COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt +COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh + +RUN pip install --no-cache -r requirements.txt && \ + pip uninstall -y acryl-datahub && \ + chmod +x /entrypoint.sh && \ + addgroup --gid 1000 datahub && \ + adduser --disabled-password --uid 1000 --gid 1000 --home /datahub-ingestion datahub + +ENTRYPOINT [ "/entrypoint.sh" ] + +FROM ${BASE_IMAGE} as full-install + +RUN apt-get update && apt-get install -y -qq \ + default-jre-headless \ + && rm -rf /var/lib/apt/lists/* /var/cache/apk/* + RUN if [ $(arch) = "x86_64" ]; then \ mkdir /opt/oracle && \ cd /opt/oracle && \ @@ -58,7 +78,10 @@ RUN if [ $(arch) = "x86_64" ]; then \ ldconfig; \ fi; -COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt +FROM ${BASE_IMAGE} as slim-install +# Do nothing else on top of base + +FROM ${APP_ENV}-install -RUN pip install -r requirements.txt && \ - pip uninstall -y acryl-datahub +USER datahub +ENV PATH="/datahub-ingestion/.local/bin:$PATH" diff --git a/docker/datahub-ingestion-base/base-requirements.txt b/docker/datahub-ingestion-base/base-requirements.txt index 537ad7fcf3e63..82d9a93a9a2c3 100644 --- a/docker/datahub-ingestion-base/base-requirements.txt +++ b/docker/datahub-ingestion-base/base-requirements.txt @@ -1,29 +1,34 @@ +# Excluded for slim +# pyspark==3.0.3 +# pydeequ==1.0.1 + acryl-datahub-classify==0.0.6 acryl-iceberg-legacy==0.0.4 acryl-PyHive==0.6.13 aenum==3.1.12 aiohttp==3.8.4 aiosignal==1.3.1 -alembic==1.10.3 +alembic==1.11.1 altair==4.2.0 -anyio==3.6.2 -apache-airflow==2.5.3 -apache-airflow-providers-common-sql==1.4.0 -apache-airflow-providers-ftp==3.3.1 -apache-airflow-providers-http==4.3.0 -apache-airflow-providers-imap==3.1.1 -apache-airflow-providers-sqlite==3.3.1 -apispec==3.3.2 +anyio==3.7.0 +apache-airflow==2.6.1 +apache-airflow-providers-common-sql==1.5.1 +apache-airflow-providers-ftp==3.4.1 +apache-airflow-providers-http==4.4.1 +apache-airflow-providers-imap==3.2.1 +apache-airflow-providers-sqlite==3.4.1 +apispec==5.2.2 appdirs==1.4.4 appnope==0.1.3 -argcomplete==3.0.5 +argcomplete==3.0.8 argon2-cffi==21.3.0 argon2-cffi-bindings==21.2.0 -asgiref==3.6.0 +asgiref==3.7.2 asn1crypto==1.5.1 asttokens==2.2.1 async-timeout==4.0.2 -attrs==22.2.0 +asynch==0.2.2 +attrs==23.1.0 avro==1.10.2 avro-gen3==0.7.10 azure-core==1.26.4 @@ -37,25 +42,27 @@ beautifulsoup4==4.12.2 bleach==6.0.0 blinker==1.6.2 blis==0.7.9 -boto3==1.26.113 -botocore==1.29.113 +boto3==1.26.142 +botocore==1.29.142 bowler==0.9.0 bracex==2.3.post1 cached-property==1.5.2 cachelib==0.9.0 -cachetools==5.3.0 +cachetools==5.3.1 catalogue==2.0.8 cattrs==22.2.0 -certifi==2022.12.7 +certifi==2023.5.7 cffi==1.15.1 chardet==5.1.0 charset-normalizer==2.1.1 +ciso8601==2.3.0 click==8.1.3 click-default-group==1.2.2 click-spinner==0.1.10 clickclick==20.10.2 -clickhouse-driver==0.2.5 -clickhouse-sqlalchemy==0.2.3 +clickhouse-cityhash==1.0.2.4 +clickhouse-driver==0.2.6 +clickhouse-sqlalchemy==0.2.4 cloudpickle==2.2.1 colorama==0.4.6 colorlog==4.8.0 @@ -63,23 +70,24 @@ confection==0.0.4 ConfigUpdater==3.1.1 confluent-kafka==1.8.2 connexion==2.14.2 -cron-descriptor==1.2.35 -croniter==1.3.14 +cron-descriptor==1.4.0 +croniter==1.3.15 cryptography==37.0.4 cx-Oracle==8.3.0 cymem==2.0.7 -dask==2023.3.2 -databricks-cli==0.17.3 +dask==2023.5.1 +databricks-cli==0.17.7 databricks-dbapi==0.6.0 +databricks-sdk==0.1.8 debugpy==1.6.7 decorator==5.1.1 defusedxml==0.7.1 -deltalake==0.8.1 -Deprecated==1.2.13 +deltalake==0.9.0 +Deprecated==1.2.14 dill==0.3.6 dnspython==2.3.0 -docker==6.0.1 -docutils==0.19 +docker==6.1.2 +docutils==0.20.1 ecdsa==0.18.0 elasticsearch==7.13.4 email-validator==1.3.1 @@ -88,56 +96,58 @@ et-xmlfile==1.1.0 exceptiongroup==1.1.1 executing==1.2.0 expandvars==0.9.0 -fastapi==0.95.1 -fastavro==1.7.3 -fastjsonschema==2.16.3 +fastapi==0.95.2 +fastavro==1.7.4 +fastjsonschema==2.17.1 feast==0.29.0 -filelock==3.11.0 +filelock==3.12.0 fissix==21.11.13 -Flask==2.2.3 +Flask==2.2.5 flatdict==4.0.1 frozenlist==1.3.3 -fsspec==2023.4.0 +fsspec==2023.5.0 future==0.18.3 -GeoAlchemy2==0.13.1 +GeoAlchemy2==0.13.3 gitdb==4.0.10 GitPython==3.1.31 google-api-core==2.11.0 -google-auth==2.17.3 +google-auth==2.19.0 google-cloud-appengine-logging==1.3.0 google-cloud-audit-log==0.2.5 -google-cloud-bigquery==3.9.0 +google-cloud-bigquery==3.10.0 google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 google-cloud-datacatalog-lineage==0.2.2 google-cloud-logging==3.5.0 google-crc32c==1.5.0 -google-resumable-media==2.4.1 +google-resumable-media==2.5.0 googleapis-common-protos==1.59.0 -gql==3.4.0 +gql==3.4.1 graphql-core==3.2.3 graphviz==0.20.1 great-expectations==0.15.50 greenlet==2.0.2 grpc-google-iam-v1==0.12.6 -grpcio==1.53.0 -grpcio-reflection==1.53.0 -grpcio-status==1.53.0 -grpcio-tools==1.53.0 +grpcio==1.54.2 +grpcio-reflection==1.54.2 +grpcio-status==1.54.2 +grpcio-tools==1.54.2 +gssapi==1.8.2 gunicorn==20.1.0 h11==0.14.0 hmsclient==0.1.1 -httpcore==0.17.0 +httpcore==0.17.2 httptools==0.5.0 -httpx==0.24.0 +httpx==0.24.1 humanfriendly==10.0 idna==3.4 ijson==3.2.0.post0 -importlib-metadata==6.3.0 +importlib-metadata==6.6.0 +importlib-resources==5.12.0 inflection==0.5.1 ipaddress==1.0.23 ipykernel==6.17.1 -ipython==8.12.0 +ipython==8.13.2 ipython-genutils==0.2.0 ipywidgets==8.0.6 iso3166==2.1.1 @@ -160,13 +170,16 @@ jupyterlab-widgets==3.0.7 langcodes==3.3.0 lark==1.1.4 lazy-object-proxy==1.9.0 +leb128==1.0.5 +limits==3.5.0 linear-tsv==1.1.0 -linkify-it-py==2.0.0 +linkify-it-py==2.0.2 lkml==1.3.1 locket==1.0.0 lockfile==0.12.2 looker-sdk==23.0.0 lxml==4.9.2 +lz4==4.3.2 makefun==1.15.1 Mako==1.2.4 Markdown==3.4.3 @@ -181,28 +194,29 @@ mdit-py-plugins==0.3.5 mdurl==0.1.2 mistune==2.0.5 mixpanel==4.10.0 -mmh3==3.1.0 +mmh3==4.0.0 more-itertools==9.1.0 moreorless==0.4.0 -moto==4.1.7 +moto==4.1.10 msal==1.16.0 msal-extensions==1.0.0 multidict==6.0.4 murmurhash==1.0.9 -mypy==1.2.0 +mypy==1.3.0 mypy-extensions==1.0.0 -nbclassic==0.5.5 +nbclassic==1.0.0 nbclient==0.6.3 -nbconvert==7.3.1 +nbconvert==7.4.0 nbformat==5.8.0 nest-asyncio==1.5.6 networkx==3.1 notebook==6.5.4 -notebook_shim==0.2.2 -numpy==1.24.2 +notebook_shim==0.2.3 +numpy==1.24.3 oauthlib==3.2.2 okta==1.7.0 openpyxl==3.1.2 +ordered-set==4.1.0 oscrypto==1.3.0 packaging==23.1 pandas==1.5.3 @@ -217,42 +231,40 @@ pendulum==2.1.2 pexpect==4.8.0 phonenumbers==8.13.0 pickleshare==0.7.5 -platformdirs==3.2.0 +platformdirs==3.5.1 pluggy==1.0.0 portalocker==2.7.0 preshed==3.0.8 prison==0.2.1 progressbar2==4.2.0 -prometheus-client==0.16.0 +prometheus-client==0.17.0 prompt-toolkit==3.0.38 proto-plus==1.22.2 -protobuf==4.22.3 -psutil==5.9.4 +protobuf==4.23.2 +psutil==5.9.5 psycopg2-binary==2.9.6 ptyprocess==0.7.0 pure-eval==0.2.2 pure-sasl==0.6.2 -py-partiql-parser==0.1.0 +py-partiql-parser==0.3.0 pyarrow==8.0.0 -pyasn1==0.4.8 -pyasn1-modules==0.2.8 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 pyathena==2.4.1 pycountry==22.3.5 pycparser==2.21 -pycryptodome==3.17 -pycryptodomex==3.17 -pydantic==1.10.7 -pydash==7.0.1 -pydeequ==1.0.1 +pycryptodome==3.18.0 +pycryptodomex==3.18.0 +pydantic==1.10.8 +pydash==7.0.3 pydruid==0.6.5 -Pygments==2.15.0 +Pygments==2.15.1 pymongo==4.3.3 PyMySQL==1.0.3 pyOpenSSL==22.0.0 pyparsing==3.0.9 pyrsistent==0.19.3 -pyspark==3.0.3 -pyspnego==0.8.0 +pyspnego==0.9.0 python-daemon==3.0.1 python-dateutil==2.8.2 python-dotenv==1.0.0 @@ -262,43 +274,44 @@ python-nvd3==0.15.0 python-slugify==8.0.1 python-stdnum==1.18 python-tds==1.12.0 -python-utils==3.5.2 +python-utils==3.6.0 python3-openid==3.2.0 pytz==2023.3 -pytz-deprecation-shim==0.1.0.post0 pytzdata==2020.1 PyYAML==6.0 -pyzmq==25.0.2 +pyzmq==25.1.0 ratelimiter==1.2.0.post0 redash-toolbelt==0.1.9 redshift-connector==2.0.910 -regex==2023.3.23 +regex==2023.5.5 requests==2.28.2 requests-file==1.5.1 +requests-gssapi==1.2.3 requests-ntlm==1.2.0 requests-toolbelt==0.10.1 responses==0.23.1 retrying==1.3.4 rfc3339-validator==0.1.4 rfc3986==2.0.0 -rich==13.3.4 +rich==13.3.5 +rich_argparse==1.1.0 rsa==4.9 ruamel.yaml==0.17.17 -s3transfer==0.6.0 +s3transfer==0.6.1 sasl3==0.2.11 schwifty==2023.3.0 scipy==1.10.1 scramp==1.4.4 -Send2Trash==1.8.0 +Send2Trash==1.8.2 setproctitle==1.3.2 -simple-salesforce==1.12.3 +simple-salesforce==1.12.4 six==1.16.0 smart-open==6.3.0 smmap==5.0.0 sniffio==1.3.0 snowflake-connector-python==2.9.0 snowflake-sqlalchemy==1.4.7 -soupsieve==2.4 +soupsieve==2.4.1 spacy==3.4.3 spacy-legacy==3.0.12 spacy-loggers==1.0.4 @@ -308,46 +321,45 @@ sqlalchemy-bigquery==1.6.1 SQLAlchemy-JSONField==1.0.1.post0 sqlalchemy-pytds==0.3.5 sqlalchemy-redshift==0.8.14 -SQLAlchemy-Utils==0.41.0 -sqlalchemy2-stubs==0.0.2a33 +SQLAlchemy-Utils==0.41.1 +sqlalchemy2-stubs==0.0.2a34 sqllineage==1.3.6 sqlparse==0.4.3 srsly==2.4.6 stack-data==0.6.2 -starlette==0.26.1 +starlette==0.27.0 tableauserverclient==0.25 tableschema==1.20.2 tabulate==0.9.0 tabulator==1.53.5 tenacity==8.2.2 -termcolor==2.2.0 +termcolor==2.3.0 terminado==0.17.1 text-unidecode==1.3 -thinc==8.1.9 +thinc==8.1.10 thrift==0.16.0 thrift-sasl==0.4.3 tinycss2==1.2.1 toml==0.10.2 tomli==2.0.1 toolz==0.12.0 -tornado==6.2 +tornado==6.3.2 tqdm==4.65.0 traitlets==5.2.1.post0 -trino==0.322.0 +trino==0.324.0 typeguard==2.13.3 typer==0.7.0 -types-PyYAML==6.0.12.9 -typing-inspect==0.8.0 +types-PyYAML==6.0.12.10 +typing-inspect==0.9.0 typing_extensions==4.5.0 -tzdata==2023.3 -tzlocal==4.3 -uc-micro-py==1.0.1 +tzlocal==5.0.1 +uc-micro-py==1.0.2 ujson==5.7.0 unicodecsv==0.14.1 -urllib3==1.26.15 -uvicorn==0.21.1 +urllib3==1.26.16 +uvicorn==0.22.0 uvloop==0.17.0 -vertica-python==1.3.1 +vertica-python==1.3.2 vertica-sqlalchemy-dialect==0.0.1 vininfo==1.7.0 volatile==2.1.0 @@ -356,13 +368,15 @@ watchfiles==0.19.0 wcmatch==8.4.1 wcwidth==0.2.6 webencodings==0.5.1 -websocket-client==1.5.1 -websockets==11.0.1 +websocket-client==1.5.2 +websockets==11.0.3 Werkzeug==2.2.3 widgetsnbextension==4.0.7 wrapt==1.15.0 WTForms==3.0.1 xlrd==2.0.1 xmltodict==0.13.0 -yarl==1.8.2 +yarl==1.9.2 zeep==4.2.1 +zipp==3.15.0 +zstd==1.5.5.1 diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle index fe3c12a59886f..10cd2ee71cce3 100644 --- a/docker/datahub-ingestion-base/build.gradle +++ b/docker/datahub-ingestion-base/build.gradle @@ -12,14 +12,17 @@ ext { } docker { - name "${docker_registry}/${docker_repo}:v${version}" - version "v${version}" + name "${docker_registry}/${docker_repo}:v${version}-slim" + version "v${version}-slim" dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } + buildArgs([APP_ENV: 'slim']) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -27,10 +30,11 @@ task mkdirBuildDocker { } } dockerClean.finalizedBy(mkdirBuildDocker) +dockerClean.dependsOn([':docker:datahub-ingestion:dockerClean']) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/docker/datahub-ingestion-base/entrypoint.sh b/docker/datahub-ingestion-base/entrypoint.sh new file mode 100644 index 0000000000000..518bb21561467 --- /dev/null +++ b/docker/datahub-ingestion-base/entrypoint.sh @@ -0,0 +1,14 @@ +#!/usr/bin/bash + +if [ ! -z "$ACTIONS_EXTRA_PACKAGES" ]; then + pip install --user $ACTIONS_EXTRA_PACKAGES +fi + +if [[ ! -z "$ACTIONS_CONFIG" && ! -z "$ACTIONS_EXTRA_PACKAGES" ]]; then + mkdir -p /tmp/datahub/logs + curl -q "$ACTIONS_CONFIG" -o config.yaml + exec dockerize -wait ${DATAHUB_GMS_PROTOCOL:-http}://$DATAHUB_GMS_HOST:$DATAHUB_GMS_PORT/health -timeout 240s \ + datahub actions --config config.yaml +else + exec datahub $@ +fi diff --git a/docker/datahub-ingestion-slim/Dockerfile b/docker/datahub-ingestion-slim/Dockerfile deleted file mode 100644 index 580dcc4277124..0000000000000 --- a/docker/datahub-ingestion-slim/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -# Defining environment -ARG APP_ENV=prod -ARG DOCKER_VERSION=latest - -FROM acryldata/datahub-ingestion:$DOCKER_VERSION as base - -USER 0 -RUN pip uninstall -y pyspark -USER datahub diff --git a/docker/datahub-ingestion-slim/build.gradle b/docker/datahub-ingestion-slim/build.gradle deleted file mode 100644 index f21b66b576a0c..0000000000000 --- a/docker/datahub-ingestion-slim/build.gradle +++ /dev/null @@ -1,39 +0,0 @@ -plugins { - id 'com.palantir.docker' - id 'java' // required for versioning -} - -apply from: "../../gradle/versioning/versioning.gradle" - -ext { - docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry - docker_repo = 'datahub-ingestion-slim' - docker_dir = 'datahub-ingestion-slim' -} - -docker { - name "${docker_registry}/${docker_repo}:v${version}" - version "v${version}" - dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") - files fileTree(rootProject.projectDir) { - include "docker/${docker_dir}/*" - } - buildArgs([DOCKER_VERSION: version]) - - buildx(false) -} -tasks.getByPath('docker').dependsOn(['build', ':docker:datahub-ingestion:docker']) - -task mkdirBuildDocker { - doFirst { - mkdir "${project.buildDir}/docker" - } -} -dockerClean.finalizedBy(mkdirBuildDocker) - -task cleanLocalDockerImages { - doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) - } -} -dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index 45a98efb7f6fb..d16caea2fcecd 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -1,42 +1,27 @@ # Defining environment -ARG APP_ENV=prod +ARG APP_ENV=full +ARG BASE_IMAGE=acryldata/datahub-ingestion-base ARG DOCKER_VERSION=latest -FROM acryldata/datahub-ingestion-base:$DOCKER_VERSION as base - -FROM eclipse-temurin:11 as prod-build -COPY . /datahub-src -WORKDIR /datahub-src -# We noticed that the gradle wrapper download failed frequently on in CI on arm64 machines. -# I suspect this was due because of the QEMU emulation slowdown, combined with the arm64 -# build being starved for CPU by the x86_64 build's codegen step. -# -# The middle step will attempt to download gradle wrapper 5 times with exponential backoff. -# The ./gradlew --version will force the download of the gradle wrapper but is otherwise a no-op. -# Note that the retry logic will always return success, so we should always attempt to run codegen. -# Inspired by https://github.com/gradle/gradle/issues/18124#issuecomment-958182335. -# and https://unix.stackexchange.com/a/82610/378179. -# This is a workaround for https://github.com/gradle/gradle/issues/18124. -RUN (for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2<<$attempt)) ; done ) && \ - ./gradlew :metadata-events:mxe-schemas:build - -FROM base as prod-codegen -COPY --from=prod-build /datahub-src /datahub-src -RUN cd /datahub-src/metadata-ingestion && \ - pip install -e ".[base]" && \ - ./scripts/codegen.sh - -FROM base as prod-install -COPY --from=prod-codegen /datahub-src/metadata-ingestion /datahub-ingestion -COPY --from=prod-codegen /root/.cache/pip /root/.cache/pip +FROM $BASE_IMAGE:$DOCKER_VERSION as base +USER 0 + +COPY ./metadata-ingestion /datahub-ingestion + ARG RELEASE_VERSION -RUN cd /datahub-ingestion && \ - sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ +WORKDIR /datahub-ingestion +RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ cat src/datahub/__init__.py && \ - pip install ".[all]" && \ - pip freeze && \ - # This is required to fix security vulnerability in htrace-core4 - rm -f /usr/local/lib/python3.10/site-packages/pyspark/jars/htrace-core4-4.1.0-incubating.jar + chown -R datahub /datahub-ingestion + +USER datahub +ENV PATH="/datahub-ingestion/.local/bin:$PATH" + +FROM base as slim-install +RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" + +FROM base as full-install +RUN pip install --no-cache --user ".[all]" FROM base as dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. @@ -44,7 +29,5 @@ FROM base as dev-install FROM ${APP_ENV}-install as final -RUN addgroup --system datahub && adduser --system datahub --ingroup datahub USER datahub - -ENTRYPOINT [ "datahub" ] +ENV PATH="/datahub-ingestion/.local/bin:$PATH" \ No newline at end of file diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle index 7a24d87794c0e..22531c0c4fd0e 100644 --- a/docker/datahub-ingestion/build.gradle +++ b/docker/datahub-ingestion/build.gradle @@ -11,24 +11,30 @@ ext { docker_dir = 'datahub-ingestion' } +dependencies { + project(':docker:datahub-ingestion-base') + project(':metadata-ingestion') +} + docker { - name "${docker_registry}/${docker_repo}:v${version}" - version "v${version}" + name "${docker_registry}/${docker_repo}:v${version}-slim" + version "v${version}-slim" dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" include "metadata-ingestion/**" - include "metadata-events/**" - include "metadata-models/**" - include "li-utils/**" - include "docs/**" - include "gradle/**" - include "buildSrc/**" - include "*" + }.exclude { + i -> i.file.isHidden() || + i.file == buildDir || + i.file == project(':metadata-ingestion').buildDir } - buildArgs([DOCKER_VERSION: version]) + buildArgs([DOCKER_VERSION: version, + RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace('-slim', ''), + APP_ENV: 'slim']) } -tasks.getByPath('docker').dependsOn(['build', ':docker:datahub-ingestion-base:docker']) +tasks.getByName('docker').dependsOn(['build', + ':docker:datahub-ingestion-base:docker', + ':metadata-ingestion:codegen']) task mkdirBuildDocker { doFirst { @@ -39,7 +45,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index b6624f00c10fd..4b9d7a6c0e7a4 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -24,8 +24,11 @@ services: datahub-actions: container_name: datahub-actions hostname: actions - image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} + image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} env_file: datahub-actions/env/docker.env + environment: + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} depends_on: datahub-gms: condition: service_healthy diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index ce5fafbae403e..2b189ba46f00d 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -25,8 +25,11 @@ services: datahub-actions: container_name: datahub-actions hostname: actions - image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} + image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} env_file: datahub-actions/env/docker.env + environment: + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} depends_on: datahub-gms: condition: service_healthy diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index aefd34093af52..c0b301a6081df 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -24,8 +24,11 @@ services: datahub-actions: container_name: datahub-actions hostname: actions - image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} + image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} env_file: datahub-actions/env/docker.env + environment: + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} depends_on: datahub-gms: condition: service_healthy diff --git a/docker/elasticsearch-setup/build.gradle b/docker/elasticsearch-setup/build.gradle index cc2fe1ec5c4db..ffee3b9c65cf4 100644 --- a/docker/elasticsearch-setup/build.gradle +++ b/docker/elasticsearch-setup/build.gradle @@ -17,6 +17,8 @@ docker { files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" include "metadata-service/restli-servlet-impl/src/main/resources/index/**" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -25,7 +27,7 @@ docker { load(true) push(false) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -36,7 +38,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/docker/ingest-api/Dockerfile b/docker/ingest-api/Dockerfile index 45e8cede085c7..82e6235df26a7 100644 --- a/docker/ingest-api/Dockerfile +++ b/docker/ingest-api/Dockerfile @@ -3,7 +3,7 @@ RUN apt-get update && apt-get install -y jq RUN apt update && apt install -y build-essential python3-dev # FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim as base -FROM openjdk:8 as prod-build +FROM openjdk:11 as prod-build COPY . /datahub-src RUN cd /datahub-src && ./gradlew :metadata-events:mxe-schemas:build diff --git a/docker/kafka-setup/build.gradle b/docker/kafka-setup/build.gradle index a5d33457e45f7..573ef21c88bf9 100644 --- a/docker/kafka-setup/build.gradle +++ b/docker/kafka-setup/build.gradle @@ -16,6 +16,8 @@ docker { dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -24,7 +26,7 @@ docker { load(true) push(false) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -35,7 +37,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/mysql-setup/build.gradle b/docker/mysql-setup/build.gradle index 48a28f15a581d..0d8941cce4833 100644 --- a/docker/mysql-setup/build.gradle +++ b/docker/mysql-setup/build.gradle @@ -17,6 +17,8 @@ docker { dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -25,7 +27,7 @@ docker { load(true) push(false) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -36,7 +38,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}") + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/postgres-setup/build.gradle b/docker/postgres-setup/build.gradle index a5b0413ec4be8..8a026be09d2b4 100644 --- a/docker/postgres-setup/build.gradle +++ b/docker/postgres-setup/build.gradle @@ -17,6 +17,8 @@ docker { dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -25,7 +27,7 @@ docker { load(true) push(false) } -tasks.getByPath('docker').dependsOn('build') +tasks.getByName('docker').dependsOn('build') task mkdirBuildDocker { doFirst { @@ -36,7 +38,7 @@ dockerClean.finalizedBy(mkdirBuildDocker) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}") + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 35ff0ac4970e4..fa6b0d76a3ac2 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -34,6 +34,8 @@ services: datahub-gms: condition: service_healthy environment: + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - DATAHUB_GMS_HOST=datahub-gms - DATAHUB_GMS_PORT=8080 - DATAHUB_GMS_PROTOCOL=http @@ -45,7 +47,7 @@ services: - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 - SCHEMA_REGISTRY_URL=http://schema-registry:8081 hostname: actions - image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} + image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} datahub-frontend-react: container_name: datahub-frontend-react depends_on: diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index 097cd0304a9ef..aa04b5eb82662 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -34,6 +34,8 @@ services: datahub-gms: condition: service_healthy environment: + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - DATAHUB_GMS_HOST=datahub-gms - DATAHUB_GMS_PORT=8080 - DATAHUB_GMS_PROTOCOL=http @@ -45,7 +47,7 @@ services: - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 - SCHEMA_REGISTRY_URL=http://schema-registry:8081 hostname: actions - image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} + image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} datahub-frontend-react: container_name: datahub-frontend-react depends_on: diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 5136a698deae0..02c6ecfa39d0e 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -34,6 +34,8 @@ services: datahub-gms: condition: service_healthy environment: + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - DATAHUB_GMS_HOST=datahub-gms - DATAHUB_GMS_PORT=8080 - DATAHUB_GMS_PROTOCOL=http @@ -45,7 +47,7 @@ services: - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 - SCHEMA_REGISTRY_URL=http://schema-registry:8081 hostname: actions - image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} + image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} datahub-frontend-react: container_name: datahub-frontend-react depends_on: diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index f7f728e8d68e2..41384df172757 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -34,6 +34,8 @@ services: datahub-gms: condition: service_healthy environment: + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - DATAHUB_GMS_HOST=datahub-gms - DATAHUB_GMS_PORT=8080 - DATAHUB_GMS_PROTOCOL=http @@ -45,7 +47,7 @@ services: - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 - SCHEMA_REGISTRY_URL=http://schema-registry:8081 hostname: actions - image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} + image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} datahub-frontend-react: container_name: datahub-frontend-react depends_on: diff --git a/gradle/docker/docker.gradle b/gradle/docker/docker.gradle index f0bb4a5500b33..db2979a8ff6dc 100644 --- a/gradle/docker/docker.gradle +++ b/gradle/docker/docker.gradle @@ -21,6 +21,7 @@ ext.getDockerContainers = { ext.cleanLocalDockerImages = { String docker_registry, String docker_repo, String docker_tag -> + println("Docker image string: ${docker_registry}/${docker_repo}:${docker_tag}") def containers = getDockerContainers(docker_registry, docker_repo, docker_tag) if(!containers.isEmpty()) { println "Stopping containers: $containers" @@ -35,6 +36,7 @@ ext.cleanLocalDockerImages = { if(!images.isEmpty()) { println "Removing images: $images" exec { + ignoreExitValue true // may not work if used by downstream image commandLine = ["docker", "rmi", "-f"] + images } } diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index 44dfab36951e5..3bb1c13918c49 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -871,6 +871,7 @@ def download_compose_files( tmp_file.write(quickstart_download_response.content) logger.debug(f"Copied to {path}") if kafka_setup: + base_url = get_docker_compose_base_url(compose_git_ref) kafka_setup_github_file = f"{base_url}/{KAFKA_SETUP_QUICKSTART_COMPOSE_FILE}" default_kafka_compose_file = ( diff --git a/metadata-jobs/mae-consumer-job/build.gradle b/metadata-jobs/mae-consumer-job/build.gradle index e7941a04224e3..3811a9537ac24 100644 --- a/metadata-jobs/mae-consumer-job/build.gradle +++ b/metadata-jobs/mae-consumer-job/build.gradle @@ -43,6 +43,8 @@ docker { include 'docker/monitoring/*' include "docker/${docker_repo}/*" include 'metadata-models/src/main/resources/*' + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -55,7 +57,7 @@ tasks.getByName("docker").dependsOn([bootJar]) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/metadata-jobs/mce-consumer-job/build.gradle b/metadata-jobs/mce-consumer-job/build.gradle index 5981284e9da3f..2229c387f3676 100644 --- a/metadata-jobs/mce-consumer-job/build.gradle +++ b/metadata-jobs/mce-consumer-job/build.gradle @@ -56,6 +56,8 @@ docker { include 'docker/monitoring/*' include "docker/${docker_repo}/*" include 'metadata-models/src/main/resources/*' + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -68,7 +70,7 @@ tasks.getByName("docker").dependsOn([bootJar]) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}".toString()) + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle index ebe88ebf99849..f71ec4cd46552 100644 --- a/metadata-service/war/build.gradle +++ b/metadata-service/war/build.gradle @@ -68,6 +68,8 @@ docker { include 'docker/monitoring/*' include "docker/${docker_repo}/*" include 'metadata-models/src/main/resources/*' + }.exclude { + i -> i.file.isHidden() || i.file == buildDir } tag("Debug", "${docker_registry}/${docker_repo}:debug") @@ -80,7 +82,7 @@ tasks.getByName("docker").dependsOn([build, war]) task cleanLocalDockerImages { doLast { - rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "v${version}") + rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js index 69c8968fd25c3..24a24cc21138d 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js @@ -7,7 +7,7 @@ describe("run managed ingestion", () => { it("create run managed ingestion source", () => { let number = Math.floor(Math.random() * 100000); let testName = `cypress test source ${number}` - let cli_version = "0.9.3.3rc5"; + let cli_version = "0.10.5.4"; cy.login(); cy.goToIngestionPage(); cy.clickOptionWithText("Create new source"); @@ -31,10 +31,10 @@ describe("run managed ingestion", () => { cy.waitTextVisible(testName) cy.contains(testName).parent().within(() => { - cy.contains("Succeeded", {timeout: 30000}) + cy.contains("Succeeded", {timeout: 180000}) cy.clickOptionWithTestId("delete-button"); }) cy.clickOptionWithText("Yes") cy.ensureTextNotPresent(testName) }) -}); \ No newline at end of file +});