Skip to content

Commit

Permalink
Merge branch 'master' into s3_add_partition_to_schema
Browse files Browse the repository at this point in the history
  • Loading branch information
treff7es authored Sep 26, 2023
2 parents 98ac100 + 0a869dd commit 99d78b4
Show file tree
Hide file tree
Showing 917 changed files with 30,951 additions and 15,232 deletions.
85 changes: 85 additions & 0 deletions .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
name: Airflow Plugin
on:
push:
branches:
- master
paths:
- ".github/workflows/airflow-plugin.yml"
- "metadata-ingestion-modules/airflow-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
pull_request:
branches:
- master
paths:
- ".github/**"
- "metadata-ingestion-modules/airflow-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
release:
types: [published]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
airflow-plugin:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
include:
- python-version: "3.7"
extraPythonRequirement: "apache-airflow~=2.1.0"
- python-version: "3.7"
extraPythonRequirement: "apache-airflow~=2.2.0"
- python-version: "3.10"
extraPythonRequirement: "apache-airflow~=2.4.0"
- python-version: "3.10"
extraPythonRequirement: "apache-airflow~=2.6.0"
- python-version: "3.10"
extraPythonRequirement: "apache-airflow>2.6.0"
fail-fast: false
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Install airflow package and test (extras ${{ matrix.extraPythonRequirement }})
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:airflow-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'apache-airflow>2.6.0' }}
with:
name: Test Results (Airflow Plugin ${{ matrix.python-version}})
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
- name: Upload coverage to Codecov
if: always()
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
directory: .
fail_ci_if_error: false
flags: airflow-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }}
name: pytest-airflow
verbose: true

event-file:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
2 changes: 1 addition & 1 deletion .github/workflows/check-datahub-jars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
java-version: 11
- uses: actions/setup-python@v4
with:
python-version: "3.7"
python-version: "3.10"
- name: check ${{ matrix.command }} jar
run: |
./gradlew :metadata-integration:java:${{ matrix.command }}:build --info
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/docker-ingestion-smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ on:
release:
types: [published]
push:
branches:
- master
paths:
- "docker/datahub-ingestion-base/**"
- "smoke-test/**"
Expand Down
28 changes: 16 additions & 12 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ jobs:
unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
publish: ${{ steps.publish.outputs.publish }}
python_release_version: ${{ steps.tag.outputs.python_release_version }}
steps:
- name: Checkout
uses: actions/checkout@v3
Expand All @@ -57,7 +58,8 @@ jobs:
echo "full_tag=$(get_tag)-full" >> $GITHUB_OUTPUT
echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT
echo "unique_full_tag=$(get_unique_tag)-full" >> $GITHUB_OUTPUT
echo "unique_full_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
- name: Check whether publishing enabled
id: publish
env:
Expand Down Expand Up @@ -499,7 +501,7 @@ jobs:
platforms: linux/amd64,linux/arm64/v8
- name: Compute DataHub Ingestion (Base-Slim) Tag
id: tag
run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
datahub_ingestion_base_full_build:
name: Build and Push DataHub Ingestion (Base-Full) Docker Image
runs-on: ubuntu-latest
Expand Down Expand Up @@ -565,23 +567,24 @@ jobs:
datahub-ingestion:
- 'docker/datahub-ingestion/**'
- name: Build codegen
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
run: ./gradlew :metadata-ingestion:codegen
- name: Download Base Image
uses: ishworkh/docker-image-artifact-download@v1
if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
with:
image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}
image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
- name: Build and push Slim Image
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
uses: ./.github/actions/docker-custom-build-and-push
with:
target: final
images: |
${{ env.DATAHUB_INGESTION_IMAGE }}
build-args: |
BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}
DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
APP_ENV=slim
tags: ${{ needs.setup.outputs.slim_tag }}
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
Expand All @@ -592,7 +595,7 @@ jobs:
platforms: linux/amd64,linux/arm64/v8
- name: Compute Tag
id: tag
run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_slim_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
datahub_ingestion_slim_scan:
permissions:
contents: read # for actions/checkout to fetch code
Expand Down Expand Up @@ -647,15 +650,15 @@ jobs:
datahub-ingestion:
- 'docker/datahub-ingestion/**'
- name: Build codegen
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
run: ./gradlew :metadata-ingestion:codegen
- name: Download Base Image
uses: ishworkh/docker-image-artifact-download@v1
if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
with:
image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}
- name: Build and push Full Image
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
uses: ./.github/actions/docker-custom-build-and-push
with:
target: final
Expand All @@ -664,6 +667,7 @@ jobs:
build-args: |
BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
tags: ${{ needs.setup.outputs.unique_full_tag }}
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
Expand All @@ -673,7 +677,7 @@ jobs:
platforms: linux/amd64,linux/arm64/v8
- name: Compute Tag (Full)
id: tag
run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
datahub_ingestion_full_scan:
permissions:
contents: read # for actions/checkout to fetch code
Expand Down Expand Up @@ -739,7 +743,7 @@ jobs:
java-version: 11
- uses: actions/setup-python@v4
with:
python-version: "3.7"
python-version: "3.10"
cache: "pip"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
Expand Down Expand Up @@ -805,7 +809,7 @@ jobs:
DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }}
DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }}
ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag }}
ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor] acryl-datahub-actions'
ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5'
ACTIONS_CONFIG: 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml'
run: |
./smoke-test/run-quickstart.sh
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
- uses: actions/setup-python@v4
with:
python-version: "3.10"
cache: pip
- name: Install Python dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build Docs
Expand Down
7 changes: 2 additions & 5 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ jobs:
]
include:
- python-version: "3.7"
extraPythonRequirement: "sqlalchemy==1.3.24 apache-airflow~=2.2.0"
- python-version: "3.10"
extraPythonRequirement: "sqlalchemy~=1.4.0 apache-airflow>=2.4.0"
fail-fast: false
steps:
- uses: actions/checkout@v3
Expand All @@ -56,8 +54,8 @@ jobs:
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Install package
run: ./gradlew :metadata-ingestion:installPackageOnly
- name: Run metadata-ingestion tests (extras ${{ matrix.extraPythonRequirement }})
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion:${{ matrix.command }}
- name: Run metadata-ingestion tests
run: ./gradlew :metadata-ingestion:${{ matrix.command }}
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion/venv/bin/activate && pip freeze
Expand All @@ -80,7 +78,6 @@ jobs:
name: pytest-${{ matrix.command }}
verbose: true


event-file:
runs-on: ubuntu-latest
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/metadata-io.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
java-version: 11
- uses: actions/setup-python@v4
with:
python-version: "3.7"
python-version: "3.10"
- name: Gradle build (and test)
# there is some race condition in gradle build, which makes gradle never terminate in ~30% of the runs
# running build first without datahub-web-react:yarnBuild and then with it is 100% stable
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish-datahub-jars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
java-version: 11
- uses: actions/setup-python@v4
with:
python-version: "3.7"
python-version: "3.10"
- name: checkout upstream repo
run: |
git remote add upstream https://github.com/datahub-project/datahub.git
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/spark-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,21 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 800
fetch-tags: true
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: 11
- uses: actions/setup-python@v4
with:
python-version: "3.7"
python-version: "3.10"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Remove images
run: docker image prune -a -f || true
- name: Smoke test
run: |
./gradlew :metadata-integration:java:spark-lineage:integrationTest \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-results.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Test Results

on:
workflow_run:
workflows: ["build & test", "metadata ingestion"]
workflows: ["build & test", "metadata ingestion", "Airflow Plugin"]
types:
- completed

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ export const Logo = (props) => {
<div style={{ display: "flex", justifyContent: "center", padding: "20px", height: "190px" }}>
<img
alt="DataHub Logo"
src={useBaseUrl("/img/datahub-logo-color-mark.svg")}
src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/datahub-logo-color-mark.svg"
{...props}
/>
</div>
Expand All @@ -18,7 +18,7 @@ export const Logo = (props) => {
<!--
HOSTED_DOCS_ONLY-->
<p align="center">
<img alt="DataHub" src="docs/imgs/datahub-logo-color-mark.svg" height="150" />
<img alt="DataHub" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/datahub-logo-color-mark.svg" height="150" />
</p>
<!-- -->

Expand Down Expand Up @@ -156,7 +156,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to

- [DataHub Blog](https://blog.datahubproject.io/)
- [DataHub YouTube Channel](https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w)
- [Optum: Data Mesh via DataHub](https://optum.github.io/blog/2022/03/23/data-mesh-via-datahub/)
- [Optum: Data Mesh via DataHub](https://opensource.optum.com/blog/2022/03/23/data-mesh-via-datahub)
- [Saxo Bank: Enabling Data Discovery in Data Mesh](https://medium.com/datahub-project/enabling-data-discovery-in-a-data-mesh-the-saxo-journey-451b06969c8f)
- [Bringing The Power Of The DataHub Real-Time Metadata Graph To Everyone At Acryl Data](https://www.dataengineeringpodcast.com/acryl-data-datahub-metadata-graph-episode-230/)
- [DataHub: Popular Metadata Architectures Explained](https://engineering.linkedin.com/blog/2020/datahub-popular-metadata-architectures-explained)
Expand Down
Loading

0 comments on commit 99d78b4

Please sign in to comment.