Skip to content

Commit

Permalink
feat: separate great-expectations action package (#11096)
Browse files Browse the repository at this point in the history
Co-authored-by: Harshal Sheth <[email protected]>
  • Loading branch information
mayurinehate and hsheth2 authored Aug 21, 2024
1 parent 6b3c06a commit 9568a42
Show file tree
Hide file tree
Showing 39 changed files with 1,695 additions and 994 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ jobs:
-x :metadata-ingestion-modules:airflow-plugin:check \
-x :metadata-ingestion-modules:dagster-plugin:build \
-x :metadata-ingestion-modules:dagster-plugin:check \
-x :metadata-ingestion-modules:gx-plugin:build \
-x :metadata-ingestion-modules:gx-plugin:check \
-x :datahub-frontend:build \
-x :datahub-web-react:build \
--parallel
Expand Down
87 changes: 87 additions & 0 deletions .github/workflows/gx-plugin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
name: GX Plugin
on:
push:
branches:
- master
paths:
- ".github/workflows/gx-plugin.yml"
- "metadata-ingestion-modules/gx-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
pull_request:
branches:
- master
paths:
- ".github/**"
- "metadata-ingestion-modules/gx-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
release:
types: [published]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
gx-plugin:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
python-version: ["3.8", "3.10"]
include:
- python-version: "3.8"
extraPythonRequirement: "great-expectations~=0.15.12"
- python-version: "3.10"
extraPythonRequirement: "great-expectations~=0.16.0 numpy~=1.26.0"
- python-version: "3.11"
extraPythonRequirement: "great-expectations~=0.17.0"
fail-fast: false
steps:
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: 17
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Install GX package and test (extras ${{ matrix.extraPythonRequirement }})
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:gx-plugin:lint :metadata-ingestion-modules:gx-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }}
with:
name: Test Results (GX Plugin ${{ matrix.python-version}})
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
- name: Upload coverage to Codecov
if: always()
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
directory: .
fail_ci_if_error: false
flags: gx-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }}
name: pytest-gx
verbose: true

event-file:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
2 changes: 1 addition & 1 deletion .github/workflows/test-results.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Test Results

on:
workflow_run:
workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin"]
workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin", "GX Plugin"]
types:
- completed

Expand Down
1 change: 1 addition & 0 deletions docs-website/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall,
':metadata-ingestion:buildWheel',
':metadata-ingestion-modules:airflow-plugin:buildWheel',
':metadata-ingestion-modules:dagster-plugin:buildWheel',
':metadata-ingestion-modules:gx-plugin:buildWheel',
]) {
inputs.files(projectMdFiles)
outputs.cacheIf { true }
Expand Down
1 change: 1 addition & 0 deletions docs-website/generateDocsDir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ function copy_python_wheels(): void {
"../metadata-ingestion/dist",
"../metadata-ingestion-modules/airflow-plugin/dist",
"../metadata-ingestion-modules/dagster-plugin/dist",
"../metadata-ingestion-modules/gx-plugin/dist",
];

const wheel_output_directory = path.join(STATIC_DIRECTORY, "wheels");
Expand Down
1 change: 1 addition & 0 deletions docs-website/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,7 @@ module.exports = {
// "metadata-integration/java/openlineage-converter/README"
//"metadata-ingestion-modules/airflow-plugin/README"
//"metadata-ingestion-modules/dagster-plugin/README"
//"metadata-ingestion-modules/gx-plugin/README"
// "metadata-ingestion/schedule_docs/datahub", // we can delete this
// TODO: change the titles of these, removing the "What is..." portion from the sidebar"
// "docs/what/entity",
Expand Down
4 changes: 3 additions & 1 deletion metadata-ingestion-modules/airflow-plugin/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def get_long_description():

_version: str = package_metadata["__version__"]
_self_pin = (
f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else ""
f"=={_version}"
if not (_version.endswith(("dev0", "dev1")) or "docker" in _version)
else ""
)


Expand Down
14 changes: 3 additions & 11 deletions metadata-ingestion-modules/dagster-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"uv pip install -e . ${extra_pip_requirements} && " +
"${pip_install_command} -e . ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
}

Expand All @@ -45,15 +45,11 @@ task installDev(type: Exec, dependsOn: [install]) {
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"uv pip install -e .[dev] ${extra_pip_requirements} && " +
"${pip_install_command} -e .[dev] ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
}

task lint(type: Exec, dependsOn: installDev) {
/*
The find/sed combo below is a temporary work-around for the following mypy issue with airflow 2.2.0:
"venv/lib/python3.8/site-packages/airflow/_vendor/connexion/spec.py:169: error: invalid syntax".
*/
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"black --check --diff src/ tests/ examples/ && " +
Expand All @@ -77,7 +73,7 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"uv pip install -e .[dev,integration-tests] ${extra_pip_requirements} && " +
"${pip_install_command} -e .[dev,integration-tests] ${extra_pip_requirements} && " +
"touch ${sentinel_file}"
}

Expand Down Expand Up @@ -105,10 +101,6 @@ task testQuick(type: Exec, dependsOn: installDevTest) {
}


task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
}
task buildWheel(type: Exec, dependsOn: [environmentSetup]) {
commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " +
'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_INSTALL=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
Expand Down
8 changes: 4 additions & 4 deletions metadata-ingestion-modules/dagster-plugin/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@ def get_long_description():

_version: str = package_metadata["__version__"]
_self_pin = (
f"=={_version}" if not (_version.endswith("dev0") or "docker" in _version) else ""
f"=={_version}"
if not (_version.endswith(("dev0", "dev1")) or "docker" in _version)
else ""
)

base_requirements = {
# Actual dependencies.
"dagster >= 1.3.3",
"dagit >= 1.3.3",
*rest_common,
# Ignoring the dependency below because it causes issues with the vercel built wheel install
# f"acryl-datahub[datahub-rest]{_self_pin}",
"acryl-datahub[datahub-rest]",
f"acryl-datahub[datahub-rest]{_self_pin}",
}

mypy_stubs = {
Expand Down
143 changes: 143 additions & 0 deletions metadata-ingestion-modules/gx-plugin/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
.envrc
src/datahub_gx_plugin/__init__.py.bak
.vscode/
output
pvenv36/
bq_credentials.json
/tmp
*.bak

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Generated classes
src/datahub/metadata/
wheels/
junit.quick.xml
4 changes: 4 additions & 0 deletions metadata-ingestion-modules/gx-plugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Datahub GX Plugin

See the DataHub GX docs for details.

Loading

0 comments on commit 9568a42

Please sign in to comment.