From 978bfbbfca5c2865ce6bd0473dae50ae320f4a32 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Thu, 13 Jul 2023 10:44:00 +0530 Subject: [PATCH 01/21] Prefect source integration code added --- .../prefect-datahub/.gitignore | 143 ++++ .../prefect-datahub/MAINTAINERS.md | 114 ++++ .../prefect-datahub/MANIFEST.in | 14 + .../prefect-datahub/README.md | 146 ++++ .../prefect-datahub/build.gradle | 104 +++ .../prefect-datahub/docs/concept_mapping.md | 12 + .../prefect-datahub/docs/datahub_emitter.md | 2 + .../docs/gen_blocks_catalog.py | 103 +++ .../docs/gen_examples_catalog.py | 120 ++++ .../prefect-datahub/docs/gen_home_page.py | 21 + .../prefect-datahub/docs/img/favicon.ico | Bin 0 -> 15406 bytes .../img/prefect-logo-mark-solid-white-500.png | Bin 0 -> 16294 bytes .../docs/img/prefect-logo-white.png | Bin 0 -> 2214 bytes .../integrations/analytics/custom.html | 16 + .../docs/stylesheets/extra.css | 114 ++++ .../prefect-datahub/mkdocs.yml | 81 +++ .../prefect_datahub/__init__.py | 21 + .../prefect_datahub/datahub_emitter.py | 637 ++++++++++++++++++ .../prefect-datahub/requirements-dev.txt | 16 + .../prefect-datahub/requirements.txt | 2 + .../prefect-datahub/scripts/release.sh | 26 + .../prefect-datahub/setup.cfg | 39 ++ .../prefect-datahub/setup.py | 48 ++ .../prefect-datahub/tests/conftest.py | 489 ++++++++++++++ .../tests/test_block_standards.py | 22 + .../tests/test_datahub_emitter.py | 291 ++++++++ 26 files changed, 2581 insertions(+) create mode 100644 metadata-ingestion-modules/prefect-datahub/.gitignore create mode 100644 metadata-ingestion-modules/prefect-datahub/MAINTAINERS.md create mode 100644 metadata-ingestion-modules/prefect-datahub/MANIFEST.in create mode 100644 metadata-ingestion-modules/prefect-datahub/README.md create mode 100644 metadata-ingestion-modules/prefect-datahub/build.gradle create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/concept_mapping.md create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/datahub_emitter.md create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/gen_blocks_catalog.py create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/gen_examples_catalog.py create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/gen_home_page.py create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/img/favicon.ico create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/img/prefect-logo-mark-solid-white-500.png create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/img/prefect-logo-white.png create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/overrides/partials/integrations/analytics/custom.html create mode 100644 metadata-ingestion-modules/prefect-datahub/docs/stylesheets/extra.css create mode 100644 metadata-ingestion-modules/prefect-datahub/mkdocs.yml create mode 100644 metadata-ingestion-modules/prefect-datahub/prefect_datahub/__init__.py create mode 100644 metadata-ingestion-modules/prefect-datahub/prefect_datahub/datahub_emitter.py create mode 100644 metadata-ingestion-modules/prefect-datahub/requirements-dev.txt create mode 100644 metadata-ingestion-modules/prefect-datahub/requirements.txt create mode 100755 metadata-ingestion-modules/prefect-datahub/scripts/release.sh create mode 100644 metadata-ingestion-modules/prefect-datahub/setup.cfg create mode 100644 metadata-ingestion-modules/prefect-datahub/setup.py create mode 100644 metadata-ingestion-modules/prefect-datahub/tests/conftest.py create mode 100644 metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py create mode 100644 metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py diff --git a/metadata-ingestion-modules/prefect-datahub/.gitignore b/metadata-ingestion-modules/prefect-datahub/.gitignore new file mode 100644 index 0000000000000..d0108e8361a06 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/.gitignore @@ -0,0 +1,143 @@ +.envrc +src/datahub_airflow_plugin/__init__.py.bak +.vscode/ +output +pvenv36/ +bq_credentials.json +/tmp +*.bak + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Generated classes +src/datahub/metadata/ +wheels/ +junit.quick.xml diff --git a/metadata-ingestion-modules/prefect-datahub/MAINTAINERS.md b/metadata-ingestion-modules/prefect-datahub/MAINTAINERS.md new file mode 100644 index 0000000000000..b58c764f875c2 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/MAINTAINERS.md @@ -0,0 +1,114 @@ +# prefect-datahub + +## Getting Started + +Now that you've bootstrapped a project, follow the steps below to get started developing your Prefect Collection! + +### Python setup + +Requires an installation of Python 3.7+ + +We recommend using a Python virtual environment manager such as pipenv, conda or virtualenv. + +### GitHub setup + +Create a Git respoitory for the newly generated collection and create the first commit: + +```bash +git init +git add . +git commit -m "Initial commit: project generated by prefect-collection-template" +``` + +Then, create a new repo following the prompts at: +https://github.com/organizations/shubhamjagtap639/repositories/new + +Upon creation, push the repository to GitHub: +```bash +git remote add origin https://github.com/shubhamjagtap639/prefect-datahub.git +git branch -M main +git push -u origin main +``` + +It's recommended to setup some protection rules for main at: +https://github.com/shubhamjagtap639/prefect-datahub/settings/branches + +- Require a pull request before merging +- Require approvals + +Lastly, [code owners](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners) for the repository can be set, like this [example here](https://github.com/PrefectHQ/prefect/blob/master/.github/CODEOWNERS). + +### Project setup + +To setup your project run the following: + +```bash +# Create an editable install of your project +pip install -e ".[dev]" + +# Configure pre-commit hooks +pre-commit install +``` + +To verify the setup was successful you can run the following: + +- Run the tests for tasks and flows in the collection: + ```bash + pytest tests + ``` +- Serve the docs with `mkdocs`: + ```bash + mkdocs serve + ``` + +## Developing tasks and flows + +For information about the use and development of tasks and flow, check out the [flows](https://docs.prefect.io/concepts/flows/) and [tasks](https://docs.prefect.io/concepts/tasks/) concepts docs in the Prefect docs. + +## Writing documentation + +This collection has been setup to with [mkdocs](https://www.mkdocs.org/) for automatically generated documentation. The signatures and docstrings of your tasks and flow will be used to generate documentation for the users of this collection. You can make changes to the structure of the generated documentation by editing the `mkdocs.yml` file in this project. + +To add a new page for a module in your collection, create a new markdown file in the `docs` directory and add that file to the `nav` section of `mkdocs.yml`. If you want to automatically generate documentation based on the docstrings and signatures of the contents of the module with `mkdocstrings`, add a line to the new markdown file in the following format: + +```markdown +::: prefect_datahub.{module_name} +``` + +You can also refer to the `flows.md` and `tasks.md` files included in your generated project as examples. + +Once you have working code, replace the default "Write and run a flow" example in `README.md` to match your collection. + +## Development lifecycle + +### CI Pipeline + +This collection comes with [GitHub Actions](https://docs.github.com/en/actions) for testing and linting. To add additional actions, you can add jobs in the `.github/workflows` folder. Upon a pull request, the pipeline will run linting via [`black`](https://black.readthedocs.io/en/stable/), [`flake8`](https://flake8.pycqa.org/en/latest/), [`interrogate`](https://interrogate.readthedocs.io/en/latest/), and unit tests via `pytest` alongside `coverage`. + +`interrogate` will tell you which methods, functions, classes, and modules have docstrings, and which do not--the job has a fail threshold of 95%, meaning that it will fail if more than 5% of the codebase is undocumented. We recommend following the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for docstring format. + +Simiarly, `coverage` ensures that the codebase includes tests--the job has a fail threshold of 80%, meaning that it will fail if more than 20% of the codebase is missing tests. + +### Track Issues on Project Board + +To automatically add issues to a GitHub Project Board, you'll need a [secret added](https://docs.github.com/en/actions/security-guides/encrypted-secrets#creating-encrypted-secrets-for-an-environment) to the repository. Specifically, a secret named `ADD_TO_PROJECT_URL`, formatted like `https://github.com/orgs//projects/`. + +### Package and Publish + +GitHub actions will handle packaging and publishing of your collection to [PyPI](https://pypi.org/) so other Prefect users can your collection in their flows. + +To publish to PyPI, you'll need a PyPI account and to generate an API token to authenticate with PyPI when publishing new versions of your collection. The [PyPI documentation](https://pypi.org/help/#apitoken) outlines the steps needed to get an API token. + +Once you've obtained a PyPI API token, [create a GitHub secret](https://docs.github.com/en/actions/security-guides/encrypted-secrets#creating-encrypted-secrets-for-a-repository) named `PYPI_API_TOKEN`. + +To publish a new version of your collection, [create a new GitHub release](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository#creating-a-release) and tag it with the version that you want to deploy (e.g. v0.3.2). This will trigger a workflow to publish the new version on PyPI and deploy the updated docs to GitHub pages. + +Upon publishing, a `docs` branch is automatically created. To hook this up to GitHub Pages, simply head over to https://github.com/shubhamjagtap639/prefect-datahub/settings/pages, select `docs` under the dropdown menu, keep the default `/root` folder, `Save`, and upon refresh, you should see a prompt stating "Your site is published at https://shubhamjagtap639.github.io/prefect-datahub". Don't forget to add this link to the repo's "About" section, under "Website" so users can access the docs easily. + +Feel free to [submit your collection](https://docs.prefect.io/collections/overview/#listing-in-the-collections-catalog) to the Prefect [Collections Catalog](https://docs.prefect.io/collections/catalog/)! + +## Further guidance + +If you run into any issues during the bootstrapping process, feel free to open an issue in the [prefect-collection-template](https://github.com/PrefectHQ/prefect-collection-template) repository. + +If you have any questions or issues while developing your collection, you can find help in either the [Prefect Discourse forum](https://discourse.prefect.io/) or the [Prefect Slack community](https://prefect.io/slack). diff --git a/metadata-ingestion-modules/prefect-datahub/MANIFEST.in b/metadata-ingestion-modules/prefect-datahub/MANIFEST.in new file mode 100644 index 0000000000000..9e3fb02f8f704 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/MANIFEST.in @@ -0,0 +1,14 @@ +# Things to always exclude +global-exclude .git* +global-exclude .ipynb_checkpoints +global-exclude *.py[co] +global-exclude __pycache__/** + +# Top-level Config +include versioneer.py +include prefect_datahub/_version.py +include LICENSE +include MANIFEST.in +include setup.cfg +include requirements.txt +include requirements-dev.txt diff --git a/metadata-ingestion-modules/prefect-datahub/README.md b/metadata-ingestion-modules/prefect-datahub/README.md new file mode 100644 index 0000000000000..1aedba8c5ca90 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/README.md @@ -0,0 +1,146 @@ +# Emit flows & tasks metadata to DataHub rest with `prefect-datahub` + +

+ + PyPI + + + + + + +
+ + + + +

+ +## Welcome! + +The `prefect-datahub` collection makes it easy to leverage the capabilities of DataHub emitter in your flows, featuring support for ingesting metadata of flows, tasks & workspace to DataHub gms rest. + + +## Getting Started + +### Setup DataHub UI + +In order to use 'prefect-datahub' collection, you'll first need to deploy the new instance of DataHub. + +You can get the instructions on deploying the open source DataHub by navigating to the [apps page](https://datahubproject.io/docs/quickstart). + +Successful deployment of DataHub will lead creation of DataHub GMS service running on 'http://localhost:8080' if you have deployed it on local system. + +### Saving configurations to a block + + +This is a one-time activity, where you can save the configuration on the [Prefect block document store](https://docs.prefect.io/2.10.13/concepts/blocks/#saving-blocks). +While saving you can provide below configurations. Default value will get set if not provided while saving the configuration to block. + +Config | Type | Default | Description +--- | --- | --- | --- +datahub_rest_url | `str` | *http://localhost:8080* | DataHub GMS REST URL +env | `str` | *PROD* | The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer [here](https://datahubproject.io/docs/graphql/enums/#fabrictype). +platform_instance | `str` | *None* | The instance of the platform that all assets produced by this recipe belong to. For more detail please refer [here](https://datahubproject.io/docs/platform-instances/). + +```python +from prefect_datahub.datahub_emitter import DatahubEmitter +DatahubEmitter( + datahub_rest_url="http://localhost:8080", + env="PROD", + platform_instance="local_prefect" +).save("BLOCK-NAME-PLACEHOLDER") +``` + +Congrats! You can now load the saved block to use your configurations in your Flow code: + +```python +from prefect_datahub.datahub_emitter import DatahubEmitter +DatahubEmitter.load("BLOCK-NAME-PLACEHOLDER") +``` + +!!! info "Registering blocks" + + Register blocks in this module to + [view and edit them](https://docs.prefect.io/ui/blocks/) + on Prefect Cloud: + + ```bash + prefect block register -m prefect_datahub + ``` + +### Load the saved block in prefect workflows + +After installing `prefect-datahub` and [saving the configution](#saving-configurations-to-a-block), you can easily use it within your prefect workflows to help you emit metadata event as show below! + +```python +from datahub_provider.entities import Dataset +from prefect import flow, task + +from prefect_datahub.datahub_emitter import DatahubEmitter + +datahub_emitter = DatahubEmitter.load("MY_BLOCK_NAME") + +@task(name="Transform", description="Transform the data") +def transform(data): + data = data.split(" ") + datahub_emitter.add_task( + inputs=[Dataset("snowflake", "mydb.schema.tableA")], + outputs=[Dataset("snowflake", "mydb.schema.tableC")], + ) + return data + +@flow(name="ETL flow", description="Extract transform load flow") +def etl(): + data = transform("This is data") + datahub_emitter.emit_flow() +``` + +**Note**: To emit the tasks, user compulsory need to emit flow. Otherwise nothing will get emit. + +## Resources + +For more tips on how to use tasks and flows in a Collection, check out [Using Collections](https://docs.prefect.io/collections/usage/)! + +### Installation + +Install `prefect-datahub` with `pip`: + +```bash +pip install prefect-datahub +``` + +Requires an installation of Python 3.7+. + +We recommend using a Python virtual environment manager such as pipenv, conda or virtualenv. + +These tasks are designed to work with Prefect 2.0. For more information about how to use Prefect, please refer to the [Prefect documentation](https://docs.prefect.io/). + +### Feedback + +If you encounter any bugs while using `prefect-datahub`, feel free to open an issue in the [prefect-datahub](https://github.com/shubhamjagtap639/prefect-datahub) repository. + +If you have any questions or issues while using `prefect-datahub`, you can find help in either the [Prefect Discourse forum](https://discourse.prefect.io/) or the [Prefect Slack community](https://prefect.io/slack). + +Feel free to star or watch [`prefect-datahub`](https://github.com/shubhamjagtap639/prefect-datahub) for updates too! + +### Contributing + +If you'd like to help contribute to fix an issue or add a feature to `prefect-datahub`, please [propose changes through a pull request from a fork of the repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). + +Here are the steps: + +1. [Fork the repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo#forking-a-repository) +2. [Clone the forked repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo#cloning-your-forked-repository) +3. Install the repository and its dependencies: +``` +pip install -e ".[dev]" +``` +4. Make desired changes +5. Add tests +6. Insert an entry to [CHANGELOG.md](https://github.com/shubhamjagtap639/prefect-datahub/blob/main/CHANGELOG.md) +7. Install `pre-commit` to perform quality checks prior to commit: +``` +pre-commit install +``` +8. `git commit`, `git push`, and create a pull request diff --git a/metadata-ingestion-modules/prefect-datahub/build.gradle b/metadata-ingestion-modules/prefect-datahub/build.gradle new file mode 100644 index 0000000000000..9502452272c1b --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/build.gradle @@ -0,0 +1,104 @@ +plugins { + id 'base' +} + +ext { + python_executable = 'python3' + venv_name = 'venv' +} + +def pip_install_command = "${venv_name}/bin/pip install -e ../../metadata-ingestion" + +task checkPythonVersion(type: Exec) { + commandLine python_executable, '-c', 'import sys; assert sys.version_info >= (3, 7)' +} + +task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { + inputs.file file('setup.py') + outputs.dir("${venv_name}") + commandLine 'bash', '-c', "${python_executable} -m venv ${venv_name} && ${venv_name}/bin/python -m pip install --upgrade pip wheel 'setuptools>=63.0.0'" +} + +task installPackage(type: Exec, dependsOn: environmentSetup) { + inputs.file file('setup.py') + outputs.dir("${venv_name}") + commandLine 'bash', '-x', '-c', "${pip_install_command} -e ." +} + +task install(dependsOn: [installPackage]) + +task installDev(type: Exec, dependsOn: [install]) { + inputs.file file('setup.py') + outputs.dir("${venv_name}") + outputs.file("${venv_name}/.build_install_dev_sentinel") + commandLine 'bash', '-x', '-c', + "${pip_install_command} -e .[dev] && touch ${venv_name}/.build_install_dev_sentinel" +} + +task lint(type: Exec, dependsOn: installDev) { + commandLine 'bash', '-x', '-c', + "source ${venv_name}/bin/activate && black --check --diff prefect_datahub/ tests/ && isort --check --diff prefect_datahub/ tests/ && flake8 --count --statistics prefect_datahub/ tests/ && mypy prefect_datahub/ tests/" +} +task lintFix(type: Exec, dependsOn: installDev) { + commandLine 'bash', '-x', '-c', + "source ${venv_name}/bin/activate && " + + "black prefect_datahub/ tests/ && " + + "isort prefect_datahub/ tests/ && " + + "flake8 prefect_datahub/ tests/ && " + + "mypy prefect_datahub/ tests/ " +} + +task testQuick(type: Exec, dependsOn: installDev) { + // We can't enforce the coverage requirements if we run a subset of the tests. + inputs.files(project.fileTree(dir: "prefect_datahub/", include: "**/*.py")) + inputs.files(project.fileTree(dir: "tests/")) + outputs.dir("${venv_name}") + commandLine 'bash', '-x', '-c', + "source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" +} + +task installDevTest(type: Exec, dependsOn: [installDev]) { + inputs.file file('setup.py') + outputs.dir("${venv_name}") + outputs.file("${venv_name}/.build_install_dev_test_sentinel") + commandLine 'bash', '-x', '-c', + "${pip_install_command} -e .[dev,integration-tests] && touch ${venv_name}/.build_install_dev_test_sentinel" +} + +def testFile = hasProperty('testFile') ? testFile : 'unknown' +task testSingle(dependsOn: [installDevTest]) { + doLast { + if (testFile != 'unknown') { + exec { + commandLine 'bash', '-x', '-c', + "source ${venv_name}/bin/activate && pytest ${testFile}" + } + } else { + throw new GradleException("No file provided. Use -PtestFile=") + } + } +} + +task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) { + commandLine 'bash', '-x', '-c', + "source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml" +} +task buildWheel(type: Exec, dependsOn: [install]) { + commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' +} + +task cleanPythonCache(type: Exec) { + commandLine 'bash', '-c', + "find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" +} + +build.dependsOn install +check.dependsOn lint +check.dependsOn testQuick + +clean { + delete venv_name + delete 'build' + delete 'dist' +} +clean.dependsOn cleanPythonCache diff --git a/metadata-ingestion-modules/prefect-datahub/docs/concept_mapping.md b/metadata-ingestion-modules/prefect-datahub/docs/concept_mapping.md new file mode 100644 index 0000000000000..b6d405596e733 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/docs/concept_mapping.md @@ -0,0 +1,12 @@ +# Prefect and Datahub concept mapping + + +Prefect concepts are documented [here](https://docs.prefect.io/latest/concepts/), and datahub concepts are documented [here](https://datahubproject.io/docs/what-is-datahub/datahub-concepts). + +Prefect Concept | DataHub Concept +--- | --- +[Flow](https://docs.prefect.io/2.10.13/concepts/flows/#flows) | [DataFlow](https://datahubproject.io/docs/generated/metamodel/entities/dataflow/) +[Flow Run](https://docs.prefect.io/latest/concepts/flows/#flow-runs) | [DataProcessInstance](https://datahubproject.io/docs/generated/metamodel/entities/dataprocessinstance) +[Task](https://docs.prefect.io/2.10.13/concepts/tasks/#tasks) | [DataJob](https://datahubproject.io/docs/generated/metamodel/entities/datajob/) +[Task Run](https://docs.prefect.io/latest/concepts/tasks/#tasks) | [DataProcessInstance](https://datahubproject.io/docs/generated/metamodel/entities/dataprocessinstance) +[Task Tag](https://docs.prefect.io/latest/concepts/tasks/#tags) | [Tag](https://datahubproject.io/docs/generated/metamodel/entities/tag/) diff --git a/metadata-ingestion-modules/prefect-datahub/docs/datahub_emitter.md b/metadata-ingestion-modules/prefect-datahub/docs/datahub_emitter.md new file mode 100644 index 0000000000000..407396b30c274 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/docs/datahub_emitter.md @@ -0,0 +1,2 @@ +# Datahub Emitter +::: prefect_datahub.datahub_emitter diff --git a/metadata-ingestion-modules/prefect-datahub/docs/gen_blocks_catalog.py b/metadata-ingestion-modules/prefect-datahub/docs/gen_blocks_catalog.py new file mode 100644 index 0000000000000..7e406129028d1 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/docs/gen_blocks_catalog.py @@ -0,0 +1,103 @@ +""" +Discovers all blocks and generates a list of them in the docs +under the Blocks Catalog heading. +""" + +from pathlib import Path +from textwrap import dedent + +import mkdocs_gen_files +from prefect.blocks.core import Block +from prefect.utilities.dispatch import get_registry_for_type +from prefect.utilities.importtools import from_qualified_name, to_qualified_name + +COLLECTION_SLUG = "prefect_datahub" + + +def find_module_blocks(): + blocks = get_registry_for_type(Block) + collection_blocks = [ + block + for block in blocks.values() + if to_qualified_name(block).startswith(COLLECTION_SLUG) + ] + module_blocks = {} + for block in collection_blocks: + block_name = block.__name__ + module_nesting = tuple(to_qualified_name(block).split(".")[1:-1]) + if module_nesting not in module_blocks: + module_blocks[module_nesting] = [] + module_blocks[module_nesting].append(block_name) + return module_blocks + + +def insert_blocks_catalog(generated_file): + module_blocks = find_module_blocks() + if len(module_blocks) == 0: + return + generated_file.write( + dedent( + f""" + Below is a list of Blocks available for registration in + `prefect-datahub`. + + To register blocks in this module to + [view and edit them](https://docs.prefect.io/ui/blocks/) + on Prefect Cloud, first [install the required packages]( + https://shubhamjagtap639.github.io/prefect-datahub/#installation), + then + ```bash + prefect block register -m {COLLECTION_SLUG} + ``` + """ # noqa + ) + ) + generated_file.write( + "Note, to use the `load` method on Blocks, you must already have a block document " # noqa + "[saved through code](https://docs.prefect.io/concepts/blocks/#saving-blocks) " # noqa + "or [saved through the UI](https://docs.prefect.io/ui/blocks/).\n" + ) + for module_nesting, block_names in module_blocks.items(): + module_path = f"{COLLECTION_SLUG}." + " ".join(module_nesting) + module_title = ( + module_path.replace(COLLECTION_SLUG, "") + .lstrip(".") + .replace("_", " ") + .title() + ) + generated_file.write(f"## [{module_title} Module][{module_path}]\n") + for block_name in block_names: + block_obj = from_qualified_name(f"{module_path}.{block_name}") + block_description = block_obj.get_description() + if not block_description.endswith("."): + block_description += "." + generated_file.write( + f"[{block_name}][{module_path}.{block_name}]\n\n{block_description}\n\n" + ) + generated_file.write( + dedent( + f""" + To load the {block_name}: + ```python + from prefect import flow + from {module_path} import {block_name} + + @flow + def my_flow(): + my_block = {block_name}.load("MY_BLOCK_NAME") + + my_flow() + ``` + """ + ) + ) + generated_file.write( + f"For additional examples, check out the [{module_title} Module]" + f"(../examples_catalog/#{module_nesting[-1]}-module) " + f"under Examples Catalog.\n" + ) + + +blocks_catalog_path = Path("blocks_catalog.md") +with mkdocs_gen_files.open(blocks_catalog_path, "w") as generated_file: + insert_blocks_catalog(generated_file) diff --git a/metadata-ingestion-modules/prefect-datahub/docs/gen_examples_catalog.py b/metadata-ingestion-modules/prefect-datahub/docs/gen_examples_catalog.py new file mode 100644 index 0000000000000..c8f82614e1c64 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/docs/gen_examples_catalog.py @@ -0,0 +1,120 @@ +""" +Locates all the examples in the Collection and puts them in a single page. +""" + +import re +from collections import defaultdict +from inspect import getmembers, isclass, isfunction +from pathlib import Path +from pkgutil import iter_modules +from textwrap import dedent +from types import ModuleType +from typing import Callable, Set, Union + +import mkdocs_gen_files +from griffe.dataclasses import Docstring +from griffe.docstrings.dataclasses import DocstringSectionKind +from griffe.docstrings.parsers import Parser, parse +from prefect.logging.loggers import disable_logger +from prefect.utilities.importtools import load_module, to_qualified_name + +import prefect_datahub + +COLLECTION_SLUG = "prefect_datahub" + + +def skip_parsing(name: str, obj: Union[ModuleType, Callable], module_nesting: str): + """ + Skips parsing the object if it's a private object or if it's not in the + module nesting, preventing imports from other libraries from being added to the + examples catalog. + """ + try: + wrong_module = not to_qualified_name(obj).startswith(module_nesting) + except AttributeError: + wrong_module = False + return obj.__doc__ is None or name.startswith("_") or wrong_module + + +def skip_block_load_code_example(code_example: str) -> bool: + """ + Skips the code example if it's just showing how to load a Block. + """ + return re.search(r'\.load\("BLOCK_NAME"\)\s*$', code_example.rstrip("`")) + + +def get_code_examples(obj: Union[ModuleType, Callable]) -> Set[str]: + """ + Gathers all the code examples within an object. + """ + code_examples = set() + with disable_logger("griffe.docstrings.google"): + with disable_logger("griffe.agents.nodes"): + docstring = Docstring(obj.__doc__) + parsed_sections = parse(docstring, Parser.google) + + for section in parsed_sections: + if section.kind == DocstringSectionKind.examples: + code_example = "\n".join( + (part[1] for part in section.as_dict().get("value", [])) + ) + if not skip_block_load_code_example(code_example): + code_examples.add(code_example) + if section.kind == DocstringSectionKind.admonition: + value = section.as_dict().get("value", {}) + if value.get("annotation") == "example": + code_example = value.get("description") + if not skip_block_load_code_example(code_example): + code_examples.add(code_example) + + return code_examples + + +code_examples_grouping = defaultdict(set) +for _, module_name, ispkg in iter_modules(prefect_datahub.__path__): + + module_nesting = f"{COLLECTION_SLUG}.{module_name}" + module_obj = load_module(module_nesting) + + # find all module examples + if skip_parsing(module_name, module_obj, module_nesting): + continue + code_examples_grouping[module_name] |= get_code_examples(module_obj) + + # find all class and method examples + for class_name, class_obj in getmembers(module_obj, isclass): + if skip_parsing(class_name, class_obj, module_nesting): + continue + code_examples_grouping[module_name] |= get_code_examples(class_obj) + for method_name, method_obj in getmembers(class_obj, isfunction): + if skip_parsing(method_name, method_obj, module_nesting): + continue + code_examples_grouping[module_name] |= get_code_examples(method_obj) + + # find all function examples + for function_name, function_obj in getmembers(module_obj, callable): + if skip_parsing(function_name, function_obj, module_nesting): + continue + code_examples_grouping[module_name] |= get_code_examples(function_obj) + + +examples_catalog_path = Path("examples_catalog.md") +with mkdocs_gen_files.open(examples_catalog_path, "w") as generated_file: + generated_file.write( + dedent( + """ + # Examples Catalog + + Below is a list of examples for `prefect-datahub`. + """ + ) + ) + for module_name, code_examples in code_examples_grouping.items(): + if len(code_examples) == 0: + continue + module_title = module_name.replace("_", " ").title() + generated_file.write( + f"## [{module_title} Module][{COLLECTION_SLUG}.{module_name}]\n" + ) + for code_example in code_examples: + generated_file.write(code_example + "\n") diff --git a/metadata-ingestion-modules/prefect-datahub/docs/gen_home_page.py b/metadata-ingestion-modules/prefect-datahub/docs/gen_home_page.py new file mode 100644 index 0000000000000..334113414ed1f --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/docs/gen_home_page.py @@ -0,0 +1,21 @@ +""" +Copies README.md to index.md. +""" + +from pathlib import Path + +import mkdocs_gen_files + +# Home page + +readme_path = Path("README.md") +docs_index_path = Path("index.md") + +with open(readme_path, "r") as readme: + with mkdocs_gen_files.open(docs_index_path, "w") as generated_file: + for line in readme: + if line.startswith("Visit the full docs [here]("): + continue # prevent linking to itself + generated_file.write(line) + + mkdocs_gen_files.set_edit_path(Path(docs_index_path), readme_path) diff --git a/metadata-ingestion-modules/prefect-datahub/docs/img/favicon.ico b/metadata-ingestion-modules/prefect-datahub/docs/img/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..c4b421585b5f5cbbb793df9d0f0c7c09341d5989 GIT binary patch literal 15406 zcmeHOX>e256~0c2jl9b%wk1pMk4}DM+RU^~NR!qfl;D)rq-h{iy3Hg_r<0_ehA^Q7 z8Xyi~Cln}wn1p>Z`%c&lW-(v`wj@fRkW6WrP9Qj7AwTK8)9+mAxss4&$(C%Vt!Cal zE$`m*opZl?&b{Yc36UV~7nzv?cdK~mYeGCMgs@t@pC2#^QHk#!ddT;ov^1M& zP6p9iDbck*V)D@TK13^_!vg3~k?N#pF?bQYr2La+5A7`=TAxR>B#S)L(ly5bafF77 zAGFtnY+rBj(7__2&CubaZ1T*>(60SRXjCq*J?Q6=w)#Q)1UhdyNi`;oc0z~Mx#XFj zsoffawj*F3zRHD4;|wdq_HnFm2z00g7c|e}EUjg@(3;T4XTMJJ+}_;BL-vvzZME0G z=P{?MoI&bRuMXQ(^b%})V$O+Y`~&Tc(G=S`)d+JyQ}_G7ailkIG#cKH*kk4U~_ zKV!x91TOYZ(Y$Dp=tb*tuhKH(SiH}Orfn>qrPGkVAy1L>wKD5pv@ml5Rfz0Z^c_hb zQvRlVh1S4leF&fP7x=BY88>NKN(H?w@+0Y^GtWr*TOfZO{0;lv#qiziduOD>|E4t2 z1j9^vU6?u}BV1c4e|sN=s^G6y!3LGE!9v(zHhk|?`0qcZ5WQizLx1QsC(<|&PG0bD zQvR+2Mf%INumOA~y$>6_2OCU-4JIQ7{V|#7wWJ&LqUaUezO$dD{Jn(=ZG*qv0QoC0 z2JrhdAM$5Heg))DfDOj>j-UMfkiQc#XCuY{K3~Q@<{`(wcQFQULH;;H{Nx`jQfN;h z(H7W%dAJNV;4zp78%&1{roaZ{<0!w{q|nEZ&vCm7@;P>Y03IxW4Q9ax?=TOO=<>921_{l$ER_L&aXfJHA)i(ygzFmdKQndG`@H^zY1PP~Tn^94#pns&HU zXf4CA{uw6r+BXL53wZt4>I;mdOij~{_Y;~{?zavr7dvT? z+zNyf5%xE%`Fc^KRiQH$q8iwM#~_3+P^PDA=YHt74%F9!!-WU7r~-~}g%#Ask5YfT zwT0>-{{-Y8g8V%g1AkwDn54|i&@K%OAz#N)E)?KcMT6zGrwwVLzb3n76xO{qjDbc+ zFb4a4zJUF)&lf0INB@J@(Ym~#yNdO-`f8$+HszkN>deLxV=Qhb*Urrb!2ip4i!94y9*lN3pB|?_yYGrqv|df z9+r7`n4tP6g?8ZiME8}|`HE`uTVHolek%^_KWS^ub5sR;uFBQ0pL1U>6Yfoq2yNwv zMEB)jxpG}E*VmoWUze}nc^^z`b4J}=WIVU*nIz@Xs(g2VzPeof9wYsA`FtmP;iF`; z>t>K?NR8f%hm54(4Wp1eUmz> zuP(RsZv7Q0`{A&{e>CpfM?kESU;w}nmZI1f%!gin1@DyKZ7H*^sA zH{?07*Ywb`?8{UMJ(gtjL#*xn6@V`!{*A!TdsSYeFU|JQV&mtuFntrvH1vf(h$!FO zUAjvAoAaG=ZO1tm-sf=6f%7i&((ll$)a6vs`%#QXdUs(ACs*R%0{pyBIDhc|)$4Fl&fKck;_ub-9p_Z2#2 zo`LuI%puM@a^86F^9NTtj7X5kKumr82Gz+okz@{8sO)AE9X0z zLp+b~Sx@-I0_Ko67W54KCySjjXU?^L&fDu8;#jcKFBWhOgVz&0kG&k~8Td~D|1r+@ zVhp(U!egMv0$$VUv4CqJdMuchM(UK*YqtgooOtQJm*mfBi&NGNbPjRdfH|bEC)$Vw zEfZ5b$DR;pYfHsXoBDTMo!Q@75`P`=pG3`p>n@ygXAbGHKwnP;!~*zIaLDymidtPN z)U!{DCS{n|i1Q>PsZ`|n_aE=~68~A?KMnlMp~Jwx-^Zb?h-ESsfJ4jSg97&j+*ZmH1LcOU0__=1ob*ma5hwh05SkLM6*cS_26=`ZsKcQZKItag4 z2R^5C9b-~SLqr*Pl^PE&N&L1FC!MnrorVrv`)hX|!~R&)xu&M6sKcqZ2K(x8L3JSD zXB|}Na1+;7T;Gnh4w66il4gtnQ61OKeDgRYhkUU>t|yG-dN)n2eNw1*hqi$~SRbAr z!^I-j1LF~s{EVzI*-G}H?&zemz<-K4ZU_=dr#osPyg&T+`Fl`u;-o437}M zmscu{O!$Omq`Vrdv7km%7Bv?C82D}QLz=8zN)Evfas5)p0^t8oj!2%Zam`3qFAfxH zz}yYCx$iSSgPNPF{4Vh}ZO^|=^~Kd#JH<4&MD-b&R9Ey0oiNoQ7BtHmc$iq=nw_Dd z4jK|a<3$~mno+0xVnl*^?WIKJ=BnJgYNM6U{NW59&Z5N_KaVHoa{`^nFQKCaFVmsI zLx=^g_55!Q%9X`AO7jV`J23Yi zAN)Ee**0`6J&X3`K0~|v)MHJj2JZ{p^D^Da2-LcPpK-fTi?#I^wBox$+wfds%craJ zT3mI-K6?k(ZsLny*F%oUR*_8G@?N0Ld9}1Y_YUTsXrW@-R;VYD(RQU+TbTP&XOPv?2R9;Lt%@k==ZEzR|s|uW--%PJ+5-w84FT zL5_0gcrZ_baqFME&i(;o!SiDsc(o#9MAN*?x;3McTz^{yo}97pc>$a&^{e0aAo~aC z;K)v+hJwe`bLMT*pM=G!+dKH)_4cR6+*Gm`4?;b$64y1@S`E+fV9frXyAHps`!yDz zE?m3=xNk+y5r4eF@8gf3m?w_H@8L|(8sK)t2Dh$9{P2h1Nzj>f9YbgCvBxiCFZ0A^ z*$#VufwLl=;CFkv#STC16iL(oo;Z9w>5xBZ4{y-#e5OA}>mStrAB4>t;%jdoeid=7_B8#8?WpJ84_?u+a=1M7efvwIpoF NR+s-j|KD$ce*-+RrwsrA literal 0 HcmV?d00001 diff --git a/metadata-ingestion-modules/prefect-datahub/docs/img/prefect-logo-mark-solid-white-500.png b/metadata-ingestion-modules/prefect-datahub/docs/img/prefect-logo-mark-solid-white-500.png new file mode 100644 index 0000000000000000000000000000000000000000..f83aa6ef6a34ee4c596bd1c7c2046a2f05cb9342 GIT binary patch literal 16294 zcmeHui93{U-}jhCO)18bC1MPr#S%*PVJu}=wkQgrjY9TqO7<{Gq6|{fC=!yag=j;I z?35&z*MTDY)qN19Ds@i@vLgtHu7KVvdv|0_|Io}kKi0N zJq^D$Upjo;2mWec{jjC(g9Y!F5u!N)+~fZfEs+f zc*gfMHsIm~Pam}aP2BnjHTcYWERVyk5Ai*(iL*1cz!JQ?&tMPADaa|{w6&-U$}1|$!U$QPKu_P(0kWPx z5`P!@*E)J8L+<(t zre3aR-2(Ma`<~Ha=~z}lRd&Ck)xUK7clv)B^IvAOs8TzA^o)(Qks16%w&UbL=oI}*+^^EZ*zUu4)7d9+->mfbbgt)9wDR+@ zJbS{e#(4F%9?3tCRyN&`S3LdeZBplSkD1A{!@B#AoOOOEb#{LqbyrHB_>ItChZ0>} zc$)$j0;ej5=aw}6zfT9WeC#h_lAD4;ZkJBNCt7-$R;}uiPx$%oWYyn%TTs|!4Ps_y z<~2$*!EkPQf#UvnwRr4uyL`dw?J=(Q@2U@eAFEeP!bBiYSOgpTqaI#YE8AXRVbosB zkgn5SIKx&=>~xP4s=WSp0F662xl(V{;?rQ$Tu6a^|M%Q9bE^+w0kwOa2Xf!@qVWhM zCZbLcCHbQUR}|kf!`bM}#DuJby3@1;Uu6$(ZQLZNq01VBKt)Mn-+AuwzUb_9Hm*-7 z_!uoZingie>fe3%j^M9eH1p#aZoW$xrzZqwRR~nI-7Z#X45MhZQY&ytc(rY8tlfmU zlkt^`GmEa}L#os4{gZSXC719lE&581M9^eTfFc7CY512aya3zg5@Rai47I!Q%hk?G zWW)Ghwu$}*1<;SP^P4_*54QvJbifsvoX)~z-xoYn%h?w@zg>27@3 zm9>_ql_|YZ%x9pXZn(q0G~~?(J~aN69TFXCN51g%67uHl-y!)5q>kL0Ty~BgkvHL1 zp6x#ID{`7RKSDPC6bc5S@-IBsK*|a&2;Kj)qy3)J%o$?;mo1`1^ng-r!SJvI<4k9O z7;>B&9qut6nqjV!eF+&;)xq{7CxELwdP=yVpF?=B&Zz^)pIjHp%!?1Awn*2tExz9r z5j~h$EWt?qJk5rllZa?ry6h~Artx^ZYc{?^JLo(2t}|NdJp+qHqBH9eNKJGyi5XGa zy~mYSFztL1y>C}UTN=*!`pwOyu2d60VMYX=RzKrZL-HHtVv)5{g zohq;2yZcB+wD~0L3AH!M-X*cNcd>1pvN+Mc@t*jQ?^8O(;fPjgY-WPyJBu$P#c1qY zar5&&na%q!3-bA|2W$@PAQ=i(R~sCN9(>oyZ2R-T6{w?*2z+DS&Y^o*Go1?Jn!jpr zb}RZdklps3rQ=bUMLf34<-bGK z@x&C7Q*eQA@wHw46ryO{;^;K5Q=Nv+fdBhSllEk-raEok>h{= z=Z)Q)lS$@B?uOE_5uL$!=hT;=h?j$wC(hgJwW!VvCSJ`~?=_^6_t!ohD9eg@C9NzM zj6`cLVVb&Lvq%6rd$b=xUUUwx?0<#F)ixE3+!;b1kUl;=WdaH@`C;65(bxc1I?5HL zywt~6nKqDPFQ6_IoKb9A(~j0$z~p^qQ=CyA)Y}`(J0jty$mlDd<=ZHZB?Z4+ zPIg;X8)qa8D3qERXrWIZyeSCdq+QJSe3kOT!MW#7(pox0j~tI0G~^&S3LA_B zHv0VOKg{y(dwYZ<>qOUy8>nwg(FZx>)Yxk~Zj!>|FICyJ3q=0(r?QI)`-bu%Pde8g z7Fw+CcR9cZX~Oc)aIs}1*-&KDxPE-IdB@w+EZ5p{K_#hA=@J{t(F zXME6_X@@;Kx%6tp@mie3)0>qWYaV_Ycwn#H)MByx>|@Hn4qbDt$h%h%B(Jwg=7dwO zNK^0|PuCy2y+m^!iBxavPHa|*XfhO6&?L$gtF#se+f+R?-<^!mmESiaiwh@Xs_f}$ z)~be%ozr;#6p6N7j3En+l;N8k6GPh!M(!$^ICJd8tsOtGnr2m7GKNa&yBQ;OBeHJy zMdwkBL)Z>IS?jX0<7r85<`@-z$ zbc#FWRWd1yT33r{zJWhRJ1L}|GVC2+wRo*fY4f@G=1kIB0;QD{Zr*MhAHqCZ*sjQh ziNKS=JTiS~LXBcM(xM;vNe-QBHgs*u>J3V;D|I_s6l)>K*5GJS#~pP4wI_Fl%fgh^ z%4~^%Z68je2>%B`e6tzLKyo4?$)}Qc&DLkUK6iYIUv_P;=4FG`=P5-rvV^-w_0`uK zP}paN*}MJ1lN16yxy-xGF+sZ1DKA4Edxxu*4MzvkS2;AREa|@;ZNw$C-x0&b5yVE2 z$W(?#lPaT8BsWMP$8dWh<85io@}tr`{nhGrX-uHHnj;i8{Jb8#OYMs>;ggJPZOW9G zoJGl<)5=r0IS<|9TJc4K8r7B_T^4-NO}5r3-s@=Sbtl`dqgS^2cg%7?K1CqUV?5$k z&R+brqg^5-2?+?xNPz%7D(kaD{cEeYk<>-3J={{exRn{X)Yh$Wr$TNDrq#Ox~!&LyyLiP8%n! z9W{M*y#07ZxbdVB5lR}&24Y@kJyo?eF8uI-Q>EEwpCZHbI|$t@!GNC0Wy2rZi0vPi zJlh2d>_@g1b0FD~n3%-67xq-AfymELbCKb29Z1T7y^dL_2ExeDH*eG zC`Xq0WD`5vwm^NN%~n~j=r1*Sn(@_2{^rN>(NN)zP14E#!XX;4_2yYx<&nNxb!Nj$?Qf65b=wKYMZY}%=3m73$Z%b_cAF!) zox;N{-LMgD?h*ZE#_xJddS&U%vzfJz7o{n;UzPYb$e}^1*dV04-oP6Z;g~iFjtjf4 zI5Fs+?P6B5t7T)u*5l61t;g2a6Wsh~MfsQ&D_sao*x(S8drC;V;Ks3t!pt7HzYo>s%C}l zY>`^qM-MP1Na_wuYxjK&7mC-{JjeU?0>RCl+)Ekv76e(Q@(rpX6iy~Km{u*v_?6n3 zS-ci(Js?TUnMXMv;T4B16QJHn()Wv;coEYlwDh|>RmpEHY(>X9J~JmkC!1B`%fv#d zpwKPr9ByB9GOuuc#~=P+B^P%Yt=EUXy0ToN0D~VXXp6hh$L{RqMoTPlZ0n2tG#Sy!bulu$}>=b}54^5glJ#;v)74W{$mT+G3wL6o`oTKPNIexoiaT zkKZa@(xRD=KZyUCV*~2rASlc9@+`eLw}L!yr8sz|SAtT*Rhh)$*0^r85%@i@*&fo4 zXPIRQnkSBacV<#m*Lo+kABP=JJwAOE&+>)jEo_lwqXz~a?(vJw^3q3 zQIi*VSY41HBq~8G-X1r2pVJXW=zG@ru9G)j`V)kl_rT_dgy8c<=bgDyhW9a&p%}I~ zSA_zMVNIaz1zU4@jYM@-qivBqn=cGOM${SgW9)XHjA}0lJ{MJ#sDX~-i-SkG5 zQb$3lqv_J+P3)|YeKTHa8+0JvP$v7_prKbNXJ|i%Jx|ZviymAn8FQg5_g+LX{t~GO zrH7Ds|A1k(6hk4fg)r^)J+7rQ+;sb2_l>B#1zg-JRqC<*V6kjV@2(ij1Dzeyr~MUz z0y`KT`|pp;&o_(1w-vjK`o7x4hSoDgvsAU;qQ-)Wg#Zdsw=LrpDD<`p^Ff;NL49|k){rzAPr!70%MFjoNt~{rO~77em@;^c_)n$qNR~i+2-PL4Q*OKV0+J<(K6WUUE*!Wni@Y z%y*VUCToL%geRnop9TZC-w`sK=JqxWU$oP<&#W*-S&*g8IuLFhzN_~V%DzEV(J#JQ z7Ivur>23^MIRjVDYoT9;+8||}L0o%xJ3{Ab1X0X5#(OCA0ymh7Vv=g# zH3W(Qye_5xF}jaZYpC&<5vg@CSN^UnjKp-BBXXwP-|UU$9l->L&h)CZ>pLC2Jy0wy zdOm}T4Xt5}$kG0A)jgft4#$Z9*4Jb`-VrD5(n=G|R$q@2^1HjQGs^U7;ot$s4}}mj zUV#&EXc~+hM0o=tJ~6n^5&dj^TGF?Mu$+TmK6X~joREM11jUAKz$0>+Fznq5yMfcS zxE-HAcCIjg?rq)Ar+qfEQwYidPPWJz&I4L0>?~36W$F8lqEPRlnfws1s_BUVf?luy zfQYU^;if8wI|B+aT6NmrY2_olHn+TtLeSPSf;ZGi2Rf)+ZWjYAtTvU&-1J7$GvVsy8)XOEhQ_X`l(H}D>4 z|8!5Gfo5&P9!A;VIIH+a|G2OLlnDVr6@yjaA@Ub%Kj^sHDc|mSS4f>OEzx-OlWsQj z`_+*$Uz?6^_KjK5&bR!O0A*c?L0l#Qdl4*YV*c}DB-%Gd)0IZg31}c&oH)A&SitO~|Lym(vjhFmN!F0ydZEak&9`1@jAXngF*gZQAWD55P1`Jgy9-)2}p8@kTF6Xwh z|D#?Qed$D-KH3-h&XcYIJCP&|*lCYN)fzcnL*PiS$3|41R7Be&sg_mjGbykWfCOG7 zWbSQ!`Yt0$2dfWkHRgXFx(^(K?=jiWB|=?%KS@x{38K5B(SuQUKajtuBIDBUim%@; zgWgWgjbi@gE?ZDHanJBHc}v6o2aDU=7cV-z>r9DLxN#k?4Wbm2{(codhuY_Nz}xV5 zvT(ugP4qvO?XCHaj~8F=aTY*rL9vn@3oqD{7$F%(p@@W?GNwn8YmMl?kkdNtX(MGF zinvMCR=CBR0rO^`AIR%-KdZAI$8`*no4vLt*xKO9pT}F%zKPjrIusqJ+2(8I z#&xn2kwg&?p~(HiVsR3uNR?lCN|j~~lDtMQu1WRn`sl|DSU&x9O~1Wkl#Ycq^rXr1 z24X2VN$D@caSoVJ@V<#dKTOwd-*TVN=ysfO-x=d9ZM)bNViY*Ax#FM`S`u1nX`zEV zPVOL*asD>_d<$AuH|JlBO=olx-mV^f`q5%p%ee|cR0c9w4KK&8wqsOriGiI_Q}O3w zJ2m%`bLMLlj&mWoA(nEZh*J1$Gd$0fQrD-hfTDz|nu!;~JDs^o01oNNp5B4Nfv_aD zZZX}H7NO+Lbi8>|x$EN7gb<;2Rvh#cI@BMHmjnSx60?QQ@Y-GWsMhMbcZspM1qH2Sn}yes(?_H$udi^@J6Yzsjs4C9{uBWMFd-sD1R07xB*6^2IY>?E zTP*cTy`sii!5aHq4rKuG&M@M=lsE=GCt{!VTO`oq*Q+F1=OT0Rtc8Xx*+D=q>ss9o zh!Z>7=FzCz+;*1Ky(cklT{VxwYV~Pkjp3YtLJgHG3Y|`$fzi)DAI%bYM|RK1NL6m@ zV{#S;Lxuy(-nK1sLMPh*k<)SN4|pObxgmv2dtsI(5b8$ktncf~AWZkfzi0}E6!UK@ z-OJ`q4I}Uvhdp;Vzey$FGO_cnP-Sg&NI^&DHb3ZTEi}Lzmc^t>K`};O#Ym+!6rvm_9X3>T$qc75zo-Ng!&0d#->Fb$5v=s`Y z7L7y&VJ^KGYnOcZB>H=MYu^+~bM;EQO1_p;s8%86QEnXc<(i2E8)6*}B+I?Ce{V(v z%4X2DJULd|{I#|@-DmJp;<;lq;c$Onh({dX=BpM@TbIZBusj9k2(MYnbsw72!CE7E z>;Q2x4tdj{+^P|7PcM2bYN@$Lsy06;D783Alh3QT)`Y&px>Xh-T*f#BhR4J1cp^oVhPN$xmdyrubCvr|sHkiD0Ju*YRq{?@km(`oV-T*{>% zK=Sx$N)&ND^^d)mL*G$L%qrZ&xKSHFC-(B9HCJOwF_(zR!L*IU8*4FNMx>s8%&dz8 zyf14qVq`BvyhuX;YAGu1dA*X<*{Lgev*o>5jVunkXdm-udD)TuKlM zlr}~56&ciZ-L1K|WadYp7W~Zgo=|NNg&gjq5;gcJ;owq6B-9TmCH1aIyaZqvp}cpd zL;b9ay_Pc{u3@6y`79@D*8C#!U&MpoByMATG8BJy3n8>{RIlPmn=OrCMs|_WO3RH7 zqk-N<(3U)ZlJkX!e6qs6&DiNbRZouONe#wjhzyv1Qv$i^Cu72ueSPS#qMR zxL;y^e|gtWug7JkHxIM0`~YnF!OO1W*dGbKt>>NIxKi0Tli~rO=qtatb1!s>VU~IU z@;e~)W?0<%j0qi`H1$jQSk~-Xu87kPkgsH=+v4cK(6|w;_L6TUX%xrYv3QXMZOSGq z&B4-KBVwH^?BJUJp^x<7q?rBid(YpZTU_QeVpbv#dd8Xo@|QaNHb$zaHwM^5i-|cJ z;1%7U_b48&(nhhZ`F`%2egSBs3uhywjhN=*8?PZhy|#FsgU=?Bl)7XieqP>!gxW1m zb1Q2^oXoSN<1pg*v*C)bSZ`+HJI}zkhjJXR#D;!M2>nd0IN#W^_U?yBu7_O;;?%bEtV1V|PfT=y_M1SDIvtYk9Ma-mLxRkE(%_76)#& z^HqlTzqJTC|1#k6c8HMM?XMU0G&iTyZ04tv1DJ%jTEAH^&A^udQsd+rW)lRteL`{O z%Mo!o3uw(K8OgSd;cOM@i#%IY8h*U@+}v_uyKkx_w>QL3aT9seGS;))R)?plwDAaC2{xBFMP%k3#nH%XYh~mUL*J z-uaxC3ENjJ)-fQHGQ355e$h`Xhq)`YD@apcw1qUqS??YdF^NC56M2#_%^dL>8@9@{ zjCQ&z^8LBj!&jD{8Dqnp=^EwSpQ^MI+GZ1{e1eAM1=_U;65+U+U9$Z&o+)D{!Ym=ewr2EwpHi`B+U@T7p%!N=YiP)aRI` z(PVl`YU{E{rS6E2h1jL6n|`6s?<&n;X9hYv!|-Kt-ikA9T_wBGPw-e*+SbM*#0_g( z=+A6_ZU5BGUvbXCAh9_&mS-EWP7A5SLMYtN-Pj#HIL?AF7}upI1fT()J|A;0L8Ey7 z^9iUMhIYhpY5NmsLZWeKO;YxYnB}aIGWGW5WKyt<{5^=VnxvOsJ`QgFm|DDv)g0#% zFgb>zAC0Kdl&maOiSp*P!=GwQ>Csm8s@~!IvEBe?VReu`X8;;9{&Q;doTBa&PHA>L zwYgq(hSz@8@BnE|^FUHU$dF)hFgk(7gb7)g&2#du-UvzmB~PHH#M-8yiTZOxo1h$* zmPp#?r@RTdfd|d1IU_852KaQ7B;RZ!sY|SJx34`OS|N@N*lvS7l_cq=ya6DDJLhGz z?g5a|i_#H2Cx8x>xq05x@C&0lkzn9ST6;FO);6?nR?L#+07U#x)X@9RYn@9bet zv;JZ^t%4h3d%`7aI*RpIH&DbZd{!;!Vw8;}bpsPWUQiSEQd}(;XMtI0V)YbNHM%+N z&clz8=vxtemA9(=OT0B}4%3-)H|4)B=2EoWhtkJSd#pr|kFoHxZ<`|%LeCY;zwcb> z`>l)gzFALQ!_B=TW%Zs;O7bdLmA6U5i;qIp$E||_kFI|)=kr&6f#w*)f<@41xw&s&v^N{m1yuD!Z(OCzR_y=?jIJNK~AXUsGTGj2x5V zm>-lfIL6da8)3fS-?_=L$m=sBB!RZt^g8SF%xE_wB~-gLsGb{35KH_`aai0PTIY1L z$tufseLsL-x=|TIOYx--$E~JrdxXE8Nxjh23{j)1Jn3Tx=YX4--pJkQ9_m-^6QOrz zP48MzOQ2>JW(zOxkZv{IhE8elbx>BYoOlfh=~rLWe)}f&6tmaQC%wpuTuI(%^s~L?d zUV2ngC~r%DGM{CAMci2Xop@|``sqx)J06m(5titA$}Kb6^4H^O1C}dS&J1TUu8OYQ z^)tdjKY=6OPha0e@)SvZiIX#+mh30QD&)z+g@>vfHxu{(BJeYPy4potjY5*gH0ZZ zdrMUd85ywWxbvC30^RT#!#0)rgBr2RnNm1Xut6nzZLqvgt--q_gl?;R-W(dh;pm8x z3mB2L-8XO2Dig>5J*+t{P^DC`q~FU#IdrQ z%)*zl-m+fSiXF#{CPAE&+DQKKvZbVja=A$-LE_sqbbq~2aov64SlZG~IQ za4<9Q+`4;C70ksen3^?NB)Si z-cN5n^p)XX+)AqKeK0m~b6}jEwTl*TDrWR#154X>NO1{fqpGv9y#pU=q~XkeTjZC!2;He0aobr(5Eu07 zaLfbk^~}ks@rtPk#%8ZFDeodfV%>Yx8)DharUk%PVHjL9;9J;4-yOdkzVtPwN`PQ9 zrh~{~Ml|eUiRC*Mqd4(LbE&rUhR-fK%9u&7P=(Sg)>{_mJ)6DN5G!)2{vt^h)Qyoz!5^b)sd$o>PE_bm#$MXQ zU2)dpEj0B_+3ydEW5n3p6K-8b+fi#c?C+FKc8!tYzj0Giu;vz&0ia}tuiqPpV|KE& z7Upd=uJeBJQPw+)YwgL{-Cf>mRi!*uZm zSX%uEld|~UQ{{T$5imy@v;aS4@joy7LhSoT|jVtbfcqpd1bA#QOWSjgSXz738(aN-!xdxJlHvLx0r#vW4AjHyl?5;zr< zkuO+-uXv6QgjN&O7?fLLw$-}y7`@dyJq>NIc3U_D{^)XJ>jY4c$I{%_?rCd^fH9S; zk~_88+#_8*TL`_&2pWU;&)Q_bqL4^a(vq}W2gMP4IsDy{AvF(wHl0;KMFA2LjeF+@QA2#Qvi@rF!LXdMcAZ{v39b)>$$J<7jk~&`sjP@x zYhjy|@3{N2_HWM=(p@+;UIG=Zm;}J_F#zK)FULimC=Lu)AbKsY7zh^s`!6l$H_|S?`y97@~bl0_i85%p% z7}~mB5dm<@A$SWkcbk5I0R~ZkWksW1tLsDTSvrOL*O(OeTakUZ~D??{8LaESugCHgASN0v2F-@)uF9jNEEis ztj_kr?C;~(n!_C+iD}P{vEm~}FcG0Embl9SoWcz3Y~^dopOr!3y!oK<8ln+q;l=ID z3aH75M0BL~OX9&MH~4&YGej9xqj|?^S(DJc#|FN9!!gIzS+;-?B;)7ng~L8v;D&&8 zon?;&zxWq2H(KykbmT$^mH-7x5YLogWFjJ_jzEC=8BFB_qt>R@7(;w4Y}^pji-%>> z@$&?cK$Qz%*Ud4ojF{!XzL}n0Lp`$PgHfZSU{FVY-Y>@7xgOCPUuj2OT?x2T7M2DT z!_?Q@_2Brf+4LInpL59)L-YfKY&mbP+;IE$lJ5h*5)V`PS96T0{8}s>6Cp0KsZ~be zaj1R6mlAt%wTARhJWTbz)u?7wb59vQ^hY>XX+>w9Bw(G9{DFe@wdSp@w$+x3%<zGj>tH;PC>##clIGCKEU3|^?}_cHpRB}n%>F8L%mV>T0~9sgaP>AG zyAk+_Y$PW!K`xbefY%3(s%if9jik$&(wqgPw8zNWC zNDzJYpDPtYO7NhMGUev(C4}gGzpw*I0E~w4#nl1R92xPDe8hHSBPve(2ZzqzBYa36 zhws5mjR<8(=8$i&r{Ea(7d&QtP6oQkI+#%~5lCrZ);?Q-PSz_$FfS!E?j&mzyu$h> zvfi^I%|=kbMx6&7>kA;*B3t&aPX%Q~KqZ5X@r7e<)+{*h+4y%=q!2JJgd)lPvo=Pp z>F=s|>;?##*RWsM|E`VD9Txm&LsuZi9i#W^pS7Xf*zk8l;#P2-tyd2V{IfPX)8&7e z`CrYj4CsH&?0I>$Pg{&0{fv6izVr+5YjfL`W9=%B zU)?*j0Cy}S@QJW=qUA<|8(UbD&gd+J!P)P;<1N=49&^W|8C=4Na@7(T{H?OZw$l zi!E%%-Rb4KJkys>EL@?a9sc3;B{#a8Q~_BgSrxVlj2LCz0)_rCD5z}AVb@G_-}u3quG(ypm&iI<%Azi#u*?P&^WlML&NB*CaRX-)kc8Zbu+ zdYT2pGA4)Q3^U3GJA=OpU;H@54R=IhIHz<@i6FI9fPxx~$-HnC0qU=wxslWKM+?kMu-2iMWKCy?v5>eki&UajN^4=x{HxX!Z>T}1~7v6S{PGv^8+2Vz_`I#NE> zac>C^TteZ8qf`FUwf;9dTURTg{_!9CjDt&hV;pY+POjhfONCUKdZ}0uWrM@$c>=V( zR>%TYn71Xp*Cb8jD|rSo8z(!!&=4pa=#Ol7y4&B3twyLGrnY=5FH3-i9E>jk z5agt&+uPhtpiX}gOp4CA5*BoOnuzbfVtAq4I;QMlOhbkA_bB>*pU9!h7dVbmwpIno zdnuF7H}g>*=wP*#W~-a!ZL1A%!;aL^x2d-Y*u7AC6kPg}PK1hfb#um= z*}j=e@-ySRW*(J_w2*uPZJ-Q?))uO(l0o-j5^EJx4NAA4yth;P4Ie6LBgsuCTNw(r z(wMIU`lvU7Q-AzlS2qiM9)z#&VD}(@#_y#Vd=^BTnAG98{8eTPD+CrA*iQL_i2fR_66zYn++`{_@^gV#6YVvC*4Cqu>_G z8v*-_r;pk(Bj(s%g~pQh`pe-g<%==ge63ZC`%~X}uFLo)4pY;4^PKXH>vdaDSDQnh zEL~NQs}ZQsUu|+k<2s%z`8yqo0x{#Eomy{Oe~@9A>@?PUQoo;9v;ca(Q6OftF@axR zs!eLfVUBxJzvhnc6qtT2cv0-8TXiT4L=`R6!TRwfG06@68j#QR6}@`v8%eg>kG z`l|nCicUdV_j#$s{V6 zw3esn&7tK=b+DI`$Wm0K0ey$B;^${f?iVX7sw4T!l)Td}?E?{{Xv_<+Xg7k$F=8a< zdqr&Yv$R_MY+{UcEQ5F>0VOw)9oz_csOrEWVlrvT-Th~0C83qs7k{Ei zeCG9Zi4N$^9r@Gvz_8Yr@&&NqmS%yz_};EX@iQZiNj@yp9!l(IaFeKJi|qH4sdx+o zvR`9^!K7f16*GC|NqteNw+phYC;=TN)Y?UecciBJxX}*-YS7 z57^MlR_Zeag;%U5jNO3bL1n&becv3KaGB%Sx{qN5p|=s^_I!KSGp?73t2Lt^ja_DA z3jdAk5p{s=Ec!n2={$Cf`qPo>mD()kw`rF*SB2O4bfIxoy?a_f$UBJt?;_8Ak2|RF zDnMk>;+=2ycMzxuck`>IKxnFN_*C62!6o_$E-U_XdDgZFOyhQM({=q-RqKKHWvSTZ zJE8=GmMh=4U0e4lxH+PE|J29MTM9&35w0vt2RR#hb+aZFP&VTF0B7V+4vj&nFQwd| zJi5c3u%jOYjttiBWE}ql-q_j^VeViB)0pGumIC>#wq@*meape}DHkhT^f}KuFH1i)~hG4?d>!>Q=4c zA^&g%-c4<;5U4o4-Xj7Y9`ahcJ0znz0{?P0-39Qy$||RJCeGi@6Yg{S)2Ev28xGa# zvn&chY=E57fc&0Yp+mhLfLVAN^baU`;pG37Pq4{yP!@R7GGx7YE8Gi$zrS)XQqk{U z)&SHQ)=5?XIfwt*H)v{fjN0-t{1|Mk26HF(|Ql!&Xri z1P%7=!<&3(E&ZHrg`roK8Qnowoi0N9&lgyou~^k9v&gT%k{A)7on2KI6I}Ub7KWsUMYqL|4P*$Xxetf}(u;>0|AzXf(TuXXgm-OW!-1JzX zR-R;Il1mK`ZW@*(*{ZYz>jT22!4$Nv=?| z31|O>zQTB|ms9U2X@d2kNjNLn8OCd^%qE$S7U3XUGiy38XLCyCqd_<~1_57il0D;g zPNd)J0kVCe^41(#q*()^*Wn=BJvA-=*z@s_!vS?6RXE6YFWNT8QbtN73Rk*p{Ggj>nBt=En~M+Z{hVJ36i3lxb%yAf(a2$%cB`LEnT#>h5NV27WksKtA-FN zoOJs3eyuD&kLZ4ZYy;^Nu2%ANmXkm!p3>MZv_0yi)UJ=Q^a-aS-Btsye0oexk$vs$ zn-crkV$m*KOq1w~j^&dD{XQ5-mvBo3lZ8!W`}UKT7Wh{Qj;%<#gp;0bb)u`j?AY%o zZ}~*UEf?mZs~=AK&O++zJKgdLW~33cCEQB*&cb4%0X<}kq)Rx-lyXh0I5Y~k)OueI z4KxY25MEBvM#Y#W;gD`%GPDVYbYsq;aFyWOEFj$ka44MQje1;fApkAH)!0avffnH~ zb`ydxoYe7~rVD)3;|nKsvarIIlN@~Eq?S`W*Ip3Pg~Di&$N)ws=qY*IWCIwvTn!jU#kT4S?bK9Yn( zx+y?XN!9)w{34JvX@GDFkW^A}8wUkQ5f1651PBMRB1Je2&-yAJgMQET*q-B4_74F`Bb}4 zO&!0VnEmBq+iiNRXs_wU-N{qUr%5<&Wfj6rhcDWBjt@L6sfJ;~VObtR@mqvu5zWyi9H(*Ip5Z?LDfps&)44GRO=CDi z1NBccF9PrBB-#J2jWJJWJQis*_M&m1O*k{7a69_(Qx?5?VO&tjWG9-VPq>`y=tMOw zgS7Tnf&UoPA~Yut?(XioKK_)-*_3pN=F@qgg(5~YM_0lbO1Y_o3l+_Ag+0~(5ht1> zM7ZmO_h`E%(LRjWLD&^8Ty!rQ2l|9F^cTI2Jl0#1>_f!Pflb4gk;fsC$vQXYU~1t~ zh~^(hNvIOd(Kl&u44pir6wRlEiGxbf93cs3C|yE?yC%tY6K%TN18@#vHFep$R4CkRof~ryT2dW$EVuDGf^&)HfBs*YK5J@<63sDGI1X6JA4xlC zk0M2LNW5)6+kL_wX75GgfJEWmF*t@Hu<1qPK z^vDURk3Qm9n*M-j{Yadp8X9>2&EPnURO63Gd-r-AyESkZnbxshlIkp6()sTP5K(ca+xCh3q|QXMJxR#o?+aX`Z( z$Wu%nu8?SX(2#I3lZT=^H|9V?!i||cWWO&OxJQFYq#oG`}x3oR;Ny^${$Q~&?~07*qoM6N<$f?w7gpa1{> literal 0 HcmV?d00001 diff --git a/metadata-ingestion-modules/prefect-datahub/docs/overrides/partials/integrations/analytics/custom.html b/metadata-ingestion-modules/prefect-datahub/docs/overrides/partials/integrations/analytics/custom.html new file mode 100644 index 0000000000000..96a2301be822f --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/docs/overrides/partials/integrations/analytics/custom.html @@ -0,0 +1,16 @@ + + + + + diff --git a/metadata-ingestion-modules/prefect-datahub/docs/stylesheets/extra.css b/metadata-ingestion-modules/prefect-datahub/docs/stylesheets/extra.css new file mode 100644 index 0000000000000..11a020958ecd8 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/docs/stylesheets/extra.css @@ -0,0 +1,114 @@ +/* theme */ +:root > * { + /* theme */ + --md-primary-fg-color: #115AF4; + --md-primary-fg-color--light: #115AF4; + --md-primary-fg-color--dark: #115AF4; +} + +/* Table formatting */ +.md-typeset table:not([class]) td { + padding: 0.5em 1.25em; +} +.md-typeset table:not([class]) th { + padding: 0.5em 1.25em; +} + +/* convenience class to keep lines from breaking +useful for wrapping table cell text in a span +to force column width */ +.no-wrap { + white-space: nowrap; +} + +/* badge formatting */ +.badge::before { + background-color: #1860F2; + color: white; + font-size: 0.8rem; + font-weight: normal; + padding: 4px 8px; + margin-left: 0.5rem; + vertical-align: super; + text-align: center; + border-radius: 5px; +} + +.badge-api::before { + background-color: #1860F2; + color: white; + font-size: 0.8rem; + font-weight: normal; + padding: 4px 8px; + text-align: center; + border-radius: 5px; +} + +.experimental::before { + background-color: #FCD14E; + content: "Experimental"; +} + +.cloud::before { + background-color: #799AF7; + content: "Prefect Cloud"; +} + +.deprecated::before { + background-color: #FA1C2F; + content: "Deprecated"; +} + +.new::before { + background-color: #2AC769; + content: "New"; +} + +.expert::before { + background-color: #726576; + content: "Advanced"; +} + +/* dark mode slate theme */ +/* dark mode code overrides */ +[data-md-color-scheme="slate"] { + --md-code-bg-color: #252a33; + --md-code-fg-color: #eee; + --md-code-hl-color: #3b3d54; + --md-code-hl-name-color: #eee; +} + +/* dark mode link overrides */ +[data-md-color-scheme="slate"] .md-typeset a { + color: var(--blue); +} + +[data-md-color-scheme="slate"] .md-typeset a:hover { + font-weight: bold; +} + +/* dark mode nav overrides */ +[data-md-color-scheme="slate"] .md-nav--primary .md-nav__item--active>.md-nav__link { + color: var(--blue); + font-weight: bold; +} + +[data-md-color-scheme="slate"] .md-nav--primary .md-nav__link--active { + color: var(--blue); + font-weight: bold; +} + +/* dark mode collection catalog overrides */ +[data-md-color-scheme="slate"] .collection-item { + background-color: #3b3d54; +} + +/* dark mode recipe collection overrides */ +[data-md-color-scheme="slate"] .recipe-item { + background-color: #3b3d54; +} + +/* dark mode API doc overrides */ +[data-md-color-scheme="slate"] .prefect-table th { + background-color: #3b3d54; +} \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-datahub/mkdocs.yml b/metadata-ingestion-modules/prefect-datahub/mkdocs.yml new file mode 100644 index 0000000000000..968d6c0b655a9 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/mkdocs.yml @@ -0,0 +1,81 @@ +site_name: prefect-datahub +site_url: https://shubhamjagtap639.github.io/prefect-datahub +repo_url: https://github.com/shubhamjagtap639/prefect-datahub +edit_uri: edit/main/docs/ +theme: + name: material + custom_dir: docs/overrides + favicon: img/favicon.ico + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: light)" + accent: blue + primary: blue + scheme: default + toggle: + icon: material/weather-sunny + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + accent: blue + primary: blue + scheme: slate + toggle: + icon: material/weather-night + name: Switch to light mode + icon: + repo: fontawesome/brands/github + logo: + img/prefect-logo-mark-solid-white-500.png + font: + text: Inter + code: Source Code Pro + features: + - content.code.copy + - content.code.annotate +extra_css: + - stylesheets/extra.css +markdown_extensions: + - admonition + - attr_list + - codehilite + - md_in_html + - meta + - pymdownx.highlight: + use_pygments: true + - pymdownx.superfences + - pymdownx.tabbed + - pymdownx.inlinehilite + - pymdownx.snippets + +plugins: + - search + - gen-files: + scripts: + - docs/gen_home_page.py + - docs/gen_examples_catalog.py + - docs/gen_blocks_catalog.py + - mkdocstrings: + handlers: + python: + options: + show_root_heading: True + show_object_full_path: False + show_category_heading: True + show_bases: True + show_signature: False + heading_level: 1 +watch: + - prefect_datahub/ + - README.md + +nav: + - Home: index.md + - Datahub Emitter: datahub_emitter.md + - Blocks Catalog: blocks_catalog.md + - Examples Catalog: examples_catalog.md + - Concept Mapping: concept_mapping.md + + diff --git a/metadata-ingestion-modules/prefect-datahub/prefect_datahub/__init__.py b/metadata-ingestion-modules/prefect-datahub/prefect_datahub/__init__.py new file mode 100644 index 0000000000000..3e00a07d907bc --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/prefect_datahub/__init__.py @@ -0,0 +1,21 @@ +# Published at https://pypi.org/project/acryl-datahub/. +__package_name__ = "prefect-datahub" +__version__ = "0.0.0.dev1" + + +def is_dev_mode() -> bool: + return __version__.endswith("dev0") + + +def nice_version_name() -> str: + if is_dev_mode(): + return "unavailable (installed in develop mode)" + return __version__ + + +def get_provider_info(): + return { + "package-name": f"{__package_name__}", + "name": f"{__package_name__}", + "description": "datahub emitter to emit prefect metadata", + } diff --git a/metadata-ingestion-modules/prefect-datahub/prefect_datahub/datahub_emitter.py b/metadata-ingestion-modules/prefect-datahub/prefect_datahub/datahub_emitter.py new file mode 100644 index 0000000000000..8ce16bd8ab763 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/prefect_datahub/datahub_emitter.py @@ -0,0 +1,637 @@ +"""Datahub Emitter classes used to emit prefect metadata to Datahub REST.""" + +import asyncio +import traceback +from typing import Dict, List, Optional +from uuid import UUID + +from datahub.api.entities.datajob import DataFlow, DataJob +from datahub.api.entities.dataprocess.dataprocess_instance import ( + DataProcessInstance, + InstanceRunResult, +) +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.metadata.schema_classes import BrowsePathsClass +from datahub.utilities.urns.data_flow_urn import DataFlowUrn +from datahub.utilities.urns.data_job_urn import DataJobUrn +from datahub.utilities.urns.dataset_urn import DatasetUrn +from datahub_provider.entities import _Entity +from prefect import get_run_logger +from prefect.blocks.core import Block +from prefect.client import cloud, orchestration +from prefect.client.schemas import FlowRun, TaskRun, Workspace +from prefect.client.schemas.objects import Flow +from prefect.context import FlowRunContext, TaskRunContext +from prefect.settings import PREFECT_API_URL +from pydantic import Field, HttpUrl, parse_obj_as + +ORCHESTRATOR = "prefect" + +# Flow and task common constants +VERSION = "version" +RETRIES = "retries" +TIMEOUT_SECONDS = "timeout_seconds" +LOG_PRINTS = "log_prints" +ON_COMPLETION = "on_completion" +ON_FAILURE = "on_failure" + +# Flow constants +FLOW_RUN_NAME = "flow_run_name" +TASK_RUNNER = "task_runner" +PERSIST_RESULT = "persist_result" +ON_CANCELLATION = "on_cancellation" +ON_CRASHED = "on_crashed" + +# Task constants +CACHE_EXPIRATION = "cache_expiration" +TASK_RUN_NAME = "task_run_name" +REFRESH_CACHE = "refresh_cache" +TASK_KEY = "task_key" + +# Flow run and task run common constants +ID = "id" +CREATED = "created" +UPDATED = "updated" +TAGS = "tags" +ESTIMATED_RUN_TIME = "estimated_run_time" +START_TIME = "start_time" +END_TIME = "end_time" +TOTAL_RUN_TIME = "total_run_time" +NEXT_SCHEDULED_START_TIME = "next_scheduled_start_time" + +# Fask run constants +CREATED_BY = "created_by" +AUTO_SCHEDULED = "auto_scheduled" + +# Task run constants +FLOW_RUN_ID = "flow_run_id" +RUN_COUNT = "run_count" +UPSTREAM_DEPENDENCIES = "upstream_dependencies" + +# States constants +COMPLETE = "Completed" +FAILED = "Failed" +CANCELLED = "Cancelled" + + +class DatahubEmitter(Block): + """ + Block used to emit prefect task and flow related metadata to Datahub REST + + Attributes: + datahub_rest_url Optional(str) : Datahub GMS Rest URL. \ + Example: http://localhost:8080. + env Optional(str) : The environment that all assets produced by this \ + orchestrator belong to. For more detail and possible values refer \ + https://datahubproject.io/docs/graphql/enums/#fabrictype. + platform_instance Optional(str) : The instance of the platform that all assets \ + produced by this recipe belong to. For more detail please refer to \ + https://datahubproject.io/docs/platform-instances/. + + Example: + Store value: + ```python + from prefect_datahub.datahub_emitter import DatahubEmitter + DatahubEmitter( + datahub_rest_url="http://localhost:8080", + env="PROD", + platform_instance="local_prefect" + ).save("BLOCK_NAME") + ``` + Load a stored value: + ```python + from prefect_datahub.datahub_emitter import DatahubEmitter + block = DatahubEmitter.load("BLOCK_NAME") + ``` + """ + + _block_type_name: Optional[str] = "datahub emitter" + # replace this with a relevant logo; defaults to Prefect logo + _logo_url = parse_obj_as( + HttpUrl, "https://datahubproject.io/img/datahub-logo-color-mark.svg" + ) # noqa + _documentation_url = parse_obj_as( + HttpUrl, + "https://shubhamjagtap639.github.io/prefect-datahub/datahub_emitter/" + "#prefect-datahub.datahub_emitter.DatahubEmitter", + ) # noqa + + datahub_rest_url: str = Field( + default="http://localhost:8080", + title="Datahub rest url", + description="Datahub GMS Rest URL. Example: http://localhost:8080", + ) + + env: str = Field( + default="prod", + title="Environment", + description="The environment that all assets produced by this orchestrator " + "belong to. For more detail and possible values refer " + "https://datahubproject.io/docs/graphql/enums/#fabrictype.", + ) + + platform_instance: Optional[str] = Field( + default=None, + title="Platform instance", + description="The instance of the platform that all assets produced by this " + "recipe belong to. For more detail please refer to " + "https://datahubproject.io/docs/platform-instances/.", + ) + + def __init__(self, *args, **kwargs): + """ + Initialize datahub rest emitter + """ + super().__init__(*args, **kwargs) + self.datajobs_to_emit = {} + self.emitter = DatahubRestEmitter(gms_server=self.datahub_rest_url) + self.emitter.test_connection() + + def _entities_to_urn_list(self, iolets: List[_Entity]) -> List[DatasetUrn]: + """ + Convert list of _entity to list of dataser urn + + Args: + iolets (list[_Entity]): The list of entities. + + Returns: + The list of Dataset URN. + """ + return [DatasetUrn.create_from_string(let.urn) for let in iolets] + + def _get_workspace(self) -> Optional[str]: + """ + Fetch workspace name if present in configured prefect api url. + + Returns: + The workspace name. + """ + try: + asyncio.run(cloud.get_cloud_client().api_healthcheck()) + except Exception: + get_run_logger().debug(traceback.format_exc()) + return None + if "workspaces" not in PREFECT_API_URL.value(): + get_run_logger().debug( + "Cannot fetch workspace name. Please login to prefect cloud using " + "command 'prefect cloud login'." + ) + return None + current_workspace_id = PREFECT_API_URL.value().split("/")[-1] + workspaces: List[Workspace] = asyncio.run( + cloud.get_cloud_client().read_workspaces() + ) + for workspace in workspaces: + if str(workspace.workspace_id) == current_workspace_id: + return workspace.workspace_name + return None + + async def _get_flow_run_graph(self, flow_run_id: str) -> Optional[List[Dict]]: + """ + Fetch the flow run graph for provided flow run id + + Args: + flow_run_id (str): The flow run id. + + Returns: + The flow run graph in json format. + """ + try: + response = await orchestration.get_client()._client.get( + f"/flow_runs/{flow_run_id}/graph" + ) + except Exception: + get_run_logger().debug(traceback.format_exc()) + return None + return response.json() + + def _emit_browsepath(self, urn: str, workspace_name: str) -> None: + """ + Emit browsepath for provided urn. Set path as orchestrator/env/workspace_name. + + Args: + urn (str): The entity URN + workspace_name (str): The prefect cloud workspace name + """ + mcp = MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=BrowsePathsClass( + paths=[f"/{ORCHESTRATOR}/{self.env}/{workspace_name}"] + ), + ) + self.emitter.emit(mcp) + + def _generate_datajob( + self, + flow_run_ctx: FlowRunContext, + task_run_ctx: Optional[TaskRunContext] = None, + task_key: Optional[str] = None, + ) -> Optional[DataJob]: + """ + Create datajob entity using task run ctx and flow run ctx. + Assign description, tags, and properties to created datajob. + + Args: + flow_run_ctx (FlowRunContext): The prefect current running flow run context. + task_run_ctx (Optional[TaskRunContext]): The prefect current running task \ + run context. + task_key (Optional[str]): The task key. + + Returns: + The datajob entity. + """ + dataflow_urn = DataFlowUrn.create_from_ids( + orchestrator=ORCHESTRATOR, + flow_id=flow_run_ctx.flow.name, + env=self.env, + platform_instance=self.platform_instance, + ) + if task_run_ctx is not None: + datajob = DataJob( + id=task_run_ctx.task.task_key, + flow_urn=dataflow_urn, + name=task_run_ctx.task.name, + ) + + datajob.description = task_run_ctx.task.description + datajob.tags = task_run_ctx.task.tags + job_property_bag: Dict[str, str] = {} + + allowed_task_keys = [ + VERSION, + CACHE_EXPIRATION, + TASK_RUN_NAME, + RETRIES, + TIMEOUT_SECONDS, + LOG_PRINTS, + REFRESH_CACHE, + TASK_KEY, + ON_COMPLETION, + ON_FAILURE, + ] + for key in allowed_task_keys: + if ( + hasattr(task_run_ctx.task, key) + and getattr(task_run_ctx.task, key) is not None + ): + job_property_bag[key] = repr(getattr(task_run_ctx.task, key)) + datajob.properties = job_property_bag + return datajob + elif task_key is not None: + datajob = DataJob( + id=task_key, flow_urn=dataflow_urn, name=task_key.split(".")[-1] + ) + return datajob + return None + + def _generate_dataflow(self, flow_run_ctx: FlowRunContext) -> Optional[DataFlow]: + """ + Create dataflow entity using flow run ctx. + Assign description, tags, and properties to created dataflow. + + Args: + flow_run_ctx (FlowRunContext): The prefect current running flow run context. + + Returns: + The dataflow entity. + """ + try: + flow: Flow = asyncio.run( + orchestration.get_client().read_flow( + flow_id=flow_run_ctx.flow_run.flow_id + ) + ) + except Exception: + get_run_logger().debug(traceback.format_exc()) + return None + assert flow + + dataflow = DataFlow( + orchestrator=ORCHESTRATOR, + id=flow_run_ctx.flow.name, + env=self.env, + name=flow_run_ctx.flow.name, + platform_instance=self.platform_instance, + ) + dataflow.description = flow_run_ctx.flow.description + dataflow.tags = set(flow.tags) + flow_property_bag: Dict[str, str] = {} + flow_property_bag[ID] = str(flow.id) + flow_property_bag[CREATED] = str(flow.created) + flow_property_bag[UPDATED] = str(flow.updated) + + allowed_flow_keys = [ + VERSION, + FLOW_RUN_NAME, + RETRIES, + TASK_RUNNER, + TIMEOUT_SECONDS, + PERSIST_RESULT, + LOG_PRINTS, + ON_COMPLETION, + ON_FAILURE, + ON_CANCELLATION, + ON_CRASHED, + ] + for key in allowed_flow_keys: + if ( + hasattr(flow_run_ctx.flow, key) + and getattr(flow_run_ctx.flow, key) is not None + ): + flow_property_bag[key] = repr(getattr(flow_run_ctx.flow, key)) + dataflow.properties = flow_property_bag + + return dataflow + + def _emit_tasks( + self, + flow_run_ctx: FlowRunContext, + dataflow: DataFlow, + workspace_name: Optional[str] = None, + ) -> None: + """ + Emit prefect tasks metadata to datahub rest. Add upstream dependencies if + present for each task. + + Args: + flow_run_ctx (FlowRunContext): The prefect current running flow run context + dataflow (DataFlow): The datahub dataflow entity. + workspace_name Optional(str): The prefect cloud workpace name. + """ + graph_json = asyncio.run( + self._get_flow_run_graph(str(flow_run_ctx.flow_run.id)) + ) + if graph_json is None: + return + + task_run_key_map: Dict[str, str] = {} + for prefect_future in flow_run_ctx.task_run_futures: + if prefect_future.task_run is not None: + task_run_key_map[ + str(prefect_future.task_run.id) + ] = prefect_future.task_run.task_key + + get_run_logger().info("Emitting tasks to datahub...") + + for node in graph_json: + datajob_urn = DataJobUrn.create_from_ids( + data_flow_urn=str(dataflow.urn), + job_id=task_run_key_map[node[ID]], + ) + datajob: Optional[DataJob] = None + if str(datajob_urn) in self.datajobs_to_emit: + datajob = self.datajobs_to_emit[str(datajob_urn)] + else: + datajob = self._generate_datajob( + flow_run_ctx=flow_run_ctx, task_key=task_run_key_map[node[ID]] + ) + if datajob is not None: + for each in node[UPSTREAM_DEPENDENCIES]: + upstream_task_urn = DataJobUrn.create_from_ids( + data_flow_urn=str(dataflow.urn), + job_id=task_run_key_map[each[ID]], + ) + datajob.upstream_urns.extend([upstream_task_urn]) + datajob.emit(self.emitter) + + if workspace_name is not None: + self._emit_browsepath(str(datajob.urn), workspace_name) + + self._emit_task_run( + datajob=datajob, + flow_run_name=flow_run_ctx.flow_run.name, + task_run_id=UUID(node[ID]), + ) + + def _emit_flow_run(self, dataflow: DataFlow, flow_run_id: UUID) -> None: + """ + Emit prefect flow run to datahub rest. Prefect flow run get mapped with datahub + data process instance entity which get's generate from provided dataflow entity. + Assign flow run properties to data process instance properties. + + Args: + dataflow (DataFlow): The datahub dataflow entity used to create \ + data process instance. + flow_run_id (UUID): The prefect current running flow run id. + """ + try: + flow_run: FlowRun = asyncio.run( + orchestration.get_client().read_flow_run(flow_run_id=flow_run_id) + ) + except Exception: + get_run_logger().debug(traceback.format_exc()) + return + assert flow_run + + if self.platform_instance is not None: + dpi_id = f"{self.platform_instance}.{flow_run.name}" + else: + dpi_id = flow_run.name + dpi = DataProcessInstance.from_dataflow(dataflow=dataflow, id=dpi_id) + + dpi_property_bag: Dict[str, str] = {} + allowed_flow_run_keys = [ + ID, + CREATED, + UPDATED, + CREATED_BY, + AUTO_SCHEDULED, + ESTIMATED_RUN_TIME, + START_TIME, + TOTAL_RUN_TIME, + NEXT_SCHEDULED_START_TIME, + TAGS, + RUN_COUNT, + ] + for key in allowed_flow_run_keys: + if hasattr(flow_run, key) and getattr(flow_run, key) is not None: + dpi_property_bag[key] = str(getattr(flow_run, key)) + dpi.properties.update(dpi_property_bag) + + if flow_run.start_time is not None: + dpi.emit_process_start( + emitter=self.emitter, + start_timestamp_millis=int(flow_run.start_time.timestamp() * 1000), + ) + + def _emit_task_run( + self, datajob: DataJob, flow_run_name: str, task_run_id: UUID + ) -> None: + """ + Emit prefect task run to datahub rest. Prefect task run get mapped with datahub + data process instance entity which get's generate from provided datajob entity. + Assign task run properties to data process instance properties. + + Args: + datajob (DataJob): The datahub datajob entity used to create \ + data process instance. + flow_run_name (str): The prefect current running flow run name. + task_run_id (str): The prefect task run id. + """ + try: + task_run: TaskRun = asyncio.run( + orchestration.get_client().read_task_run(task_run_id=task_run_id) + ) + except Exception: + get_run_logger().debug(traceback.format_exc()) + return + assert task_run + + if self.platform_instance is not None: + dpi_id = f"{self.platform_instance}.{flow_run_name}.{task_run.name}" + else: + dpi_id = f"{flow_run_name}.{task_run.name}" + dpi = DataProcessInstance.from_datajob( + datajob=datajob, + id=dpi_id, + clone_inlets=True, + clone_outlets=True, + ) + + dpi_property_bag: Dict[str, str] = {} + allowed_task_run_keys = [ + ID, + FLOW_RUN_ID, + CREATED, + UPDATED, + ESTIMATED_RUN_TIME, + START_TIME, + END_TIME, + TOTAL_RUN_TIME, + NEXT_SCHEDULED_START_TIME, + TAGS, + RUN_COUNT, + ] + for key in allowed_task_run_keys: + if hasattr(task_run, key) and getattr(task_run, key) is not None: + dpi_property_bag[key] = str(getattr(task_run, key)) + dpi.properties.update(dpi_property_bag) + + state_result_map: Dict[str, InstanceRunResult] = { + COMPLETE: InstanceRunResult.SUCCESS, + FAILED: InstanceRunResult.FAILURE, + CANCELLED: InstanceRunResult.SKIPPED, + } + + if task_run.state_name not in state_result_map: + raise Exception( + f"State should be either complete, failed or cancelled and it was " + f"{task_run.state_name}" + ) + + result = state_result_map[task_run.state_name] + + if task_run.start_time is not None: + dpi.emit_process_start( + emitter=self.emitter, + start_timestamp_millis=int(task_run.start_time.timestamp() * 1000), + emit_template=False, + ) + + if task_run.end_time is not None: + dpi.emit_process_end( + emitter=self.emitter, + end_timestamp_millis=int(task_run.end_time.timestamp() * 1000), + result=result, + result_type=ORCHESTRATOR, + ) + + def add_task( + self, + inputs: Optional[List[_Entity]] = None, + outputs: Optional[List[_Entity]] = None, + ) -> None: + """ + Store prefect current running task metadata temporarily which later get emit + to datahub rest only if user calls emit_flow. Prefect task gets mapped with + datahub datajob entity. Assign provided inputs and outputs as datajob inlets + and outlets respectively. + + Args: + inputs (Optional[list]): The list of task inputs. + outputs (Optional[list]): The list of task outputs. + + Example: + Emit the task metadata as show below: + ```python + from datahub_provider.entities import Dataset + from prefect import flow, task + + from prefect_datahub.datahub_emitter import DatahubEmitter + + datahub_emitter = DatahubEmitter.load("MY_BLOCK_NAME") + + @task(name="Transform", description="Transform the data") + def transform(data): + data = data.split(" ") + datahub_emitter.add_task( + inputs=[Dataset("snowflake", "mydb.schema.tableA")], + outputs=[Dataset("snowflake", "mydb.schema.tableC")], + ) + return data + + @flow(name="ETL flow", description="Extract transform load flow") + def etl(): + data = transform("This is data") + datahub_emitter.emit_flow() + ``` + """ + flow_run_ctx = FlowRunContext.get() + task_run_ctx = TaskRunContext.get() + assert flow_run_ctx + assert task_run_ctx + + datajob = self._generate_datajob( + flow_run_ctx=flow_run_ctx, task_run_ctx=task_run_ctx + ) + if datajob is not None: + if inputs is not None: + datajob.inlets.extend(self._entities_to_urn_list(inputs)) + if outputs is not None: + datajob.outlets.extend(self._entities_to_urn_list(outputs)) + self.datajobs_to_emit[str(datajob.urn)] = datajob + + def emit_flow(self) -> None: + """ + Emit prefect current running flow metadata to datahub rest. Prefect flow gets + mapped with datahub dataflow entity. If the user hasn't called add_task in + the task function still emit_flow will emit a task but without task name, + description,tags and properties. + + + Example: + Emit the flow metadata as show below: + ```python + from prefect import flow, task + + from prefect_datahub.datahub_emitter import DatahubEmitter + + datahub_emitter = DatahubEmitter.load("MY_BLOCK_NAME") + + @flow(name="ETL flow", description="Extract transform load flow") + def etl(): + data = extract() + data = transform(data) + load(data) + datahub_emitter.emit_flow() + ``` + """ + flow_run_ctx = FlowRunContext.get() + assert flow_run_ctx + + workspace_name = self._get_workspace() + + # Emit flow and flow run + get_run_logger().info("Emitting flow to datahub...") + dataflow = self._generate_dataflow(flow_run_ctx=flow_run_ctx) + + if dataflow is not None: + dataflow.emit(self.emitter) + + if workspace_name is not None: + self._emit_browsepath(str(dataflow.urn), workspace_name) + + self._emit_flow_run(dataflow, flow_run_ctx.flow_run.id) + + self._emit_tasks(flow_run_ctx, dataflow, workspace_name) diff --git a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt new file mode 100644 index 0000000000000..be4d2406f2975 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt @@ -0,0 +1,16 @@ +pytest +black +flake8 +mypy +mkdocs +mkdocs-material +mkdocstrings[python] +isort +pre-commit +pytest-asyncio +mock; python_version < '3.8' +mkdocs-gen-files +interrogate +coverage +pillow +types-requests \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-datahub/requirements.txt b/metadata-ingestion-modules/prefect-datahub/requirements.txt new file mode 100644 index 0000000000000..db5c355c97f8a --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/requirements.txt @@ -0,0 +1,2 @@ +prefect>=2.0.0 +acryl-datahub[datahub-rest] \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-datahub/scripts/release.sh b/metadata-ingestion-modules/prefect-datahub/scripts/release.sh new file mode 100755 index 0000000000000..17faff8c338e3 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/scripts/release.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -euxo pipefail + +if [[ ! ${RELEASE_SKIP_TEST:-} ]]; then + ../../gradlew build # also runs tests +elif [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then + ../../gradlew install +fi + +MODULE=prefect_datahub + +# Check packaging constraint. +python -c 'import setuptools; where="./prefect_datahub"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' +if [[ ${RELEASE_VERSION:-} ]]; then + # Replace version with RELEASE_VERSION env variable + sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" ${MODULE}/__init__.py +else + vim ${MODULE}/__init__.py +fi + +rm -rf build dist || true +python -m build +if [[ ! ${RELEASE_SKIP_UPLOAD:-} ]]; then + python -m twine upload 'dist/*' +fi +git restore ${MODULE}/__init__.py diff --git a/metadata-ingestion-modules/prefect-datahub/setup.cfg b/metadata-ingestion-modules/prefect-datahub/setup.cfg new file mode 100644 index 0000000000000..17d7e84c47415 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/setup.cfg @@ -0,0 +1,39 @@ +[flake8] +exclude = .git,__pycache__,build,dist +per-file-ignores = + setup.py:E501 +# Match black line-length +max-line-length = 88 +extend-ignore = + E203, + +[isort] +skip = __init__.py +profile = black +skip_gitignore = True +multi_line_output = 3 + +[versioneer] +VCS = git +style = pep440 +versionfile_source = prefect_datahub/_version.py +versionfile_build = prefect_datahub/_version.py +tag_prefix = v +parentdir_prefix = + +[tool:interrogate] +ignore-init-module = True +ignore_init_method = True +exclude = prefect_datahub/_version.py, tests, setup.py, versioneer.py, docs, site +fail-under = 95 +omit-covered-files = True + +[coverage:run] +omit = tests/*, prefect_datahub/_version.py + +[coverage:report] +fail_under = 80 +show_missing = True + +[tool:pytest] +asyncio_mode = auto diff --git a/metadata-ingestion-modules/prefect-datahub/setup.py b/metadata-ingestion-modules/prefect-datahub/setup.py new file mode 100644 index 0000000000000..ebe484ce4c7a5 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/setup.py @@ -0,0 +1,48 @@ +from setuptools import find_packages, setup + +package_metadata: dict = {} +with open("./prefect_datahub/__init__.py") as fp: + exec(fp.read(), package_metadata) + +with open("requirements.txt") as install_requires_file: + install_requires = install_requires_file.read().strip().split("\n") + +with open("requirements-dev.txt") as dev_requires_file: + dev_requires = dev_requires_file.read().strip().split("\n") + +with open("README.md") as readme_file: + readme = readme_file.read() + +setup( + name=package_metadata["__package_name__"], + version=package_metadata["__version__"], + description="Metadata emitter for datahub", + license="Apache License 2.0", + author="Acryl Data", + author_email="shubham.jagtap@gslab.com", + keywords="prefect", + url="https://github.com/PrefectHQ/prefect-datahub", + long_description=readme, + long_description_content_type="text/markdown", + packages=find_packages(exclude=("tests", "docs")), + python_requires=">=3.7", + install_requires=install_requires, + extras_require={"dev": dev_requires}, + entry_points={ + "prefect.collections": [ + "prefect_datahub = prefect_datahub", + ] + }, + classifiers=[ + "Natural Language :: English", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Software Development :: Libraries", + ], +) diff --git a/metadata-ingestion-modules/prefect-datahub/tests/conftest.py b/metadata-ingestion-modules/prefect-datahub/tests/conftest.py new file mode 100644 index 0000000000000..ee0fabc712966 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/tests/conftest.py @@ -0,0 +1,489 @@ +import asyncio +import json +import logging +from typing import Dict, List +from unittest.mock import MagicMock, patch +from uuid import UUID + +import pytest +from prefect.client.schemas import FlowRun, TaskRun, Workspace +from prefect.futures import PrefectFuture +from prefect.server.schemas.core import Flow +from requests.models import Response + +mock_transform_task_json: Dict = { + "name": "transform", + "description": "Transform the actual data", + "task_key": "__main__.transform", + "tags": ["etl flow task"], +} +mock_extract_task_run_json: Dict = { + "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", + "created": "2023-06-06T05:51:54.822707+00:00", + "updated": "2023-06-06T05:51:55.126000+00:00", + "name": "Extract-0", + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_key": "__main__.extract", + "dynamic_key": "0", + "cache_key": None, + "cache_expiration": None, + "task_version": None, + "empirical_policy": { + "max_retries": 0, + "retry_delay_seconds": 0.0, + "retries": 0, + "retry_delay": 0, + "retry_jitter_factor": None, + }, + "tags": [], + "state_id": "e280decd-2cc8-4428-a70f-149bcaf95b3c", + "task_inputs": {}, + "state_type": "COMPLETED", + "state_name": "Completed", + "run_count": 1, + "flow_run_run_count": 1, + "expected_start_time": "2023-06-06T05:51:54.822183+00:00", + "next_scheduled_start_time": None, + "start_time": "2023-06-06T05:51:55.016264+00:00", + "end_time": "2023-06-06T05:51:55.096534+00:00", + "total_run_time": 0.08027, + "estimated_run_time": 0.08027, + "estimated_start_time_delta": 0.194081, + "state": { + "id": "e280decd-2cc8-4428-a70f-149bcaf95b3c", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.096534+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, +} +mock_transform_task_run_json: Dict = { + "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", + "created": "2023-06-06T05:51:55.160372+00:00", + "updated": "2023-06-06T05:51:55.358000+00:00", + "name": "transform-0", + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_key": "__main__.transform", + "dynamic_key": "0", + "cache_key": None, + "cache_expiration": None, + "task_version": None, + "empirical_policy": { + "max_retries": 0, + "retry_delay_seconds": 0.0, + "retries": 0, + "retry_delay": 0, + "retry_jitter_factor": None, + }, + "tags": [], + "state_id": "971ad82e-6e5f-4691-abab-c900358e96c2", + "task_inputs": { + "actual_data": [ + {"input_type": "task_run", "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b"} + ] + }, + "state_type": "COMPLETED", + "state_name": "Completed", + "run_count": 1, + "flow_run_run_count": 1, + "expected_start_time": "2023-06-06T05:51:55.159416+00:00", + "next_scheduled_start_time": None, + "start_time": "2023-06-06T05:51:55.243159+00:00", + "end_time": "2023-06-06T05:51:55.332950+00:00", + "total_run_time": 0.089791, + "estimated_run_time": 0.089791, + "estimated_start_time_delta": 0.083743, + "state": { + "id": "971ad82e-6e5f-4691-abab-c900358e96c2", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.332950+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, +} +mock_load_task_run_json: Dict = { + "id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", + "created": "2023-06-06T05:51:55.389823+00:00", + "updated": "2023-06-06T05:51:55.566000+00:00", + "name": "Load_task-0", + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_key": "__main__.load", + "dynamic_key": "0", + "cache_key": None, + "cache_expiration": None, + "task_version": None, + "empirical_policy": { + "max_retries": 0, + "retry_delay_seconds": 0.0, + "retries": 0, + "retry_delay": 0, + "retry_jitter_factor": None, + }, + "tags": [], + "state_id": "0cad13c8-84e4-4bcf-8616-c5904e10dcb4", + "task_inputs": { + "data": [ + {"input_type": "task_run", "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7"} + ] + }, + "state_type": "COMPLETED", + "state_name": "Completed", + "run_count": 1, + "flow_run_run_count": 1, + "expected_start_time": "2023-06-06T05:51:55.389075+00:00", + "next_scheduled_start_time": None, + "start_time": "2023-06-06T05:51:55.461812+00:00", + "end_time": "2023-06-06T05:51:55.535954+00:00", + "total_run_time": 0.074142, + "estimated_run_time": 0.074142, + "estimated_start_time_delta": 0.072737, + "state": { + "id": "0cad13c8-84e4-4bcf-8616-c5904e10dcb4", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.535954+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": True, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, +} +mock_flow_json: Dict = { + "id": "cc65498f-d950-4114-8cc1-7af9e8fdf91b", + "created": "2023-06-02T12:31:10.988697+00:00", + "updated": "2023-06-02T12:31:10.988710+00:00", + "name": "etl", + "description": "Extract transform load flow", + "tags": [], +} +mock_flow_run_json: Dict = { + "id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "created": "2023-06-06T05:51:54.544266+00:00", + "updated": "2023-06-06T05:51:55.622000+00:00", + "name": "olivine-beagle", + "flow_id": "cc65498f-d950-4114-8cc1-7af9e8fdf91b", + "state_id": "ca2db325-d98f-40e7-862e-449cd0cc9a6e", + "deployment_id": None, + "work_queue_name": None, + "flow_version": "3ba54dfa31a7c9af4161aa4cd020a527", + "parameters": {}, + "idempotency_key": None, + "context": {}, + "empirical_policy": { + "max_retries": 0, + "retry_delay_seconds": 0.0, + "retries": 0, + "retry_delay": 0, + "pause_keys": [], + "resuming": False, + }, + "tags": [], + "parent_task_run_id": None, + "state_type": "COMPLETED", + "state_name": "Completed", + "run_count": 1, + "expected_start_time": "2023-06-06T05:51:54.543357+00:00", + "next_scheduled_start_time": None, + "start_time": "2023-06-06T05:51:54.750523+00:00", + "end_time": "2023-06-06T05:51:55.596446+00:00", + "total_run_time": 0.845923, + "estimated_run_time": 0.845923, + "estimated_start_time_delta": 0.207166, + "auto_scheduled": False, + "infrastructure_document_id": None, + "infrastructure_pid": None, + "created_by": None, + "work_pool_name": None, + "state": { + "id": "ca2db325-d98f-40e7-862e-449cd0cc9a6e", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.596446+00:00", + "message": "All states completed.", + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": None, + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, +} +mock_graph_json: List[Dict] = [ + { + "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", + "name": "Extract-0", + "upstream_dependencies": [], + "state": { + "id": "e280decd-2cc8-4428-a70f-149bcaf95b3c", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.096534+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, + "expected_start_time": "2023-06-06T05:51:54.822183+00:00", + "start_time": "2023-06-06T05:51:55.016264+00:00", + "end_time": "2023-06-06T05:51:55.096534+00:00", + "total_run_time": 0.08027, + "estimated_run_time": 0.08027, + "untrackable_result": False, + }, + { + "id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", + "name": "Load_task-0", + "upstream_dependencies": [ + {"input_type": "task_run", "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7"} + ], + "state": { + "id": "0cad13c8-84e4-4bcf-8616-c5904e10dcb4", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.535954+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": True, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, + "expected_start_time": "2023-06-06T05:51:55.389075+00:00", + "start_time": "2023-06-06T05:51:55.461812+00:00", + "end_time": "2023-06-06T05:51:55.535954+00:00", + "total_run_time": 0.074142, + "estimated_run_time": 0.074142, + "untrackable_result": True, + }, + { + "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", + "name": "transform-0", + "upstream_dependencies": [ + {"input_type": "task_run", "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b"} + ], + "state": { + "id": "971ad82e-6e5f-4691-abab-c900358e96c2", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.332950+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, + "expected_start_time": "2023-06-06T05:51:55.159416+00:00", + "start_time": "2023-06-06T05:51:55.243159+00:00", + "end_time": "2023-06-06T05:51:55.332950+00:00", + "total_run_time": 0.089791, + "estimated_run_time": 0.089791, + "untrackable_result": False, + }, +] +mock_workspace_json: Dict = { + "account_id": "33e98cfe-ad06-4ceb-a500-c11148499f75", + "account_name": "shubhamjagtapgslabcom", + "account_handle": "shubhamjagtapgslabcom", + "workspace_id": "157eb822-1b3b-4338-ae80-98edd5d00cb9", + "workspace_name": "datahub", + "workspace_description": "", + "workspace_handle": "datahub", +} + + +async def mock_task_run_future(): + extract_prefect_future = PrefectFuture( + name=mock_extract_task_run_json["name"], + key=UUID("4552629a-ac04-4590-b286-27642292739f"), + task_runner=None, + ) + extract_prefect_future.task_run = TaskRun.parse_obj(mock_extract_task_run_json) + transform_prefect_future = PrefectFuture( + name=mock_transform_task_run_json["name"], + key=UUID("40fff3e5-5ef4-4b8b-9cc8-786f91bcc656"), + task_runner=None, + ) + transform_prefect_future.task_run = TaskRun.parse_obj(mock_transform_task_run_json) + load_prefect_future = PrefectFuture( + name=mock_load_task_run_json["name"], + key=UUID("7565f596-9eb0-4330-ba34-963e7839883e"), + task_runner=None, + ) + load_prefect_future.task_run = TaskRun.parse_obj(mock_load_task_run_json) + return [extract_prefect_future, transform_prefect_future, load_prefect_future] + + +@pytest.fixture(scope="module") +def mock_run_logger(): + with patch( + "prefect_datahub.datahub_emitter.get_run_logger", + return_value=logging.getLogger(), + ) as mock_logger: + yield mock_logger + + +@pytest.fixture(scope="module") +def mock_run_context(mock_run_logger): + task_run_ctx = MagicMock() + task_run_ctx.task.task_key = mock_transform_task_json["task_key"] + task_run_ctx.task.name = mock_transform_task_json["name"] + task_run_ctx.task.description = mock_transform_task_json["description"] + task_run_ctx.task.tags = mock_transform_task_json["tags"] + + flow_run_ctx = MagicMock() + flow_run_ctx.flow.name = mock_flow_json["name"] + flow_run_ctx.flow.description = mock_flow_json["description"] + flow_run_obj = FlowRun.parse_obj(mock_flow_run_json) + flow_run_ctx.flow_run.id = flow_run_obj.id + flow_run_ctx.flow_run.name = flow_run_obj.name + flow_run_ctx.flow_run.flow_id = flow_run_obj.flow_id + flow_run_ctx.flow_run.start_time = flow_run_obj.start_time + flow_run_ctx.task_run_futures = asyncio.run(mock_task_run_future()) + + with patch( + "prefect_datahub.datahub_emitter.TaskRunContext" + ) as mock_task_run_ctx, patch( + "prefect_datahub.datahub_emitter.FlowRunContext" + ) as mock_flow_run_ctx: + mock_task_run_ctx.get.return_value = task_run_ctx + mock_flow_run_ctx.get.return_value = flow_run_ctx + yield (task_run_ctx, flow_run_ctx) + + +async def mock_task_run(*args, **kwargs): + task_run_id = str(kwargs["task_run_id"]) + if task_run_id == "fa14a52b-d271-4c41-99cb-6b42ca7c070b": + return TaskRun.parse_obj(mock_extract_task_run_json) + elif task_run_id == "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7": + return TaskRun.parse_obj(mock_transform_task_run_json) + elif task_run_id == "f19f83ea-316f-4781-8cbe-1d5d8719afc3": + return TaskRun.parse_obj(mock_load_task_run_json) + return None + + +async def mock_flow(*args, **kwargs): + return Flow.parse_obj(mock_flow_json) + + +async def mock_flow_run(*args, **kwargs): + return FlowRun.parse_obj(mock_flow_run_json) + + +async def mock_flow_run_graph(*args, **kwargs): + response = Response() + response.status_code = 200 + response._content = json.dumps(mock_graph_json, separators=(",", ":")).encode( + "utf-8" + ) + return response + + +async def mock_api_healthcheck(*args, **kwargs): + return None + + +async def mock_read_workspaces(*args, **kwargs): + return [Workspace.parse_obj(mock_workspace_json)] + + +@pytest.fixture(scope="module") +def mock_prefect_client(): + prefect_client_mock = MagicMock() + prefect_client_mock.read_flow.side_effect = mock_flow + prefect_client_mock.read_flow_run.side_effect = mock_flow_run + prefect_client_mock.read_task_run.side_effect = mock_task_run + prefect_client_mock._client.get.side_effect = mock_flow_run_graph + with patch("prefect_datahub.datahub_emitter.orchestration") as mock_client: + mock_client.get_client.return_value = prefect_client_mock + yield prefect_client_mock + + +@pytest.fixture(scope="module") +def mock_prefect_cloud_client(): + prefect_cloud_client_mock = MagicMock() + prefect_cloud_client_mock.api_healthcheck.side_effect = mock_api_healthcheck + prefect_cloud_client_mock.read_workspaces.side_effect = mock_read_workspaces + with patch("prefect_datahub.datahub_emitter.cloud") as mock_client, patch( + "prefect_datahub.datahub_emitter.PREFECT_API_URL.value", + return_value="https://api.prefect.cloud/api/accounts/33e98cfe-ad06-4ceb-" + "a500-c11148499f75/workspaces/157eb822-1b3b-4338-ae80-98edd5d00cb9", + ): + mock_client.get_cloud_client.return_value = prefect_cloud_client_mock + yield prefect_cloud_client_mock diff --git a/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py b/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py new file mode 100644 index 0000000000000..496c128309786 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py @@ -0,0 +1,22 @@ +import pytest +from prefect.blocks.core import Block +from prefect.testing.standard_test_suites import BlockStandardTestSuite +from prefect.utilities.dispatch import get_registry_for_type +from prefect.utilities.importtools import to_qualified_name + + +def find_module_blocks(): + blocks = get_registry_for_type(Block) + module_blocks = [ + block + for block in blocks.values() + if to_qualified_name(block).startswith("prefect_datahub") + ] + return module_blocks + + +@pytest.mark.parametrize("block", find_module_blocks()) +class TestAllBlocksAdhereToStandards(BlockStandardTestSuite): + @pytest.fixture + def block(self, block): + return block diff --git a/metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py b/metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py new file mode 100644 index 0000000000000..e294374a149e1 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py @@ -0,0 +1,291 @@ +import asyncio +from unittest.mock import Mock, patch + +from datahub.api.entities.datajob import DataJob +from datahub.utilities.urns.dataset_urn import DatasetUrn +from datahub_provider.entities import Dataset + +from prefect_datahub.datahub_emitter import DatahubEmitter + + +@patch("prefect_datahub.datahub_emitter.DatahubRestEmitter", autospec=True) +def test_entities_to_urn_list(mock_emit): + dataset_urn_list = DatahubEmitter()._entities_to_urn_list( + [Dataset("snowflake", "mydb.schema.tableA")] + ) + for dataset_urn in dataset_urn_list: + assert isinstance(dataset_urn, DatasetUrn) + + +@patch("prefect_datahub.datahub_emitter.DatahubRestEmitter", autospec=True) +def test_get_flow_run_graph(mock_emit, mock_prefect_client): + graph_json = asyncio.run( + DatahubEmitter()._get_flow_run_graph("c3b947e5-3fa1-4b46-a2e2-58d50c938f2e") + ) + assert isinstance(graph_json, list) + + +@patch("prefect_datahub.datahub_emitter.DatahubRestEmitter", autospec=True) +def test__get_workspace(mock_emit, mock_prefect_cloud_client): + workspace_name = DatahubEmitter()._get_workspace() + assert workspace_name == "datahub" + + +@patch("prefect_datahub.datahub_emitter.DatahubRestEmitter", autospec=True) +def test_add_task(mock_emit, mock_run_context): + mock_emitter = Mock() + mock_emit.return_value = mock_emitter + + datahub_emitter = DatahubEmitter() + inputs = [Dataset("snowflake", "mydb.schema.tableA")] + outputs = [Dataset("snowflake", "mydb.schema.tableC")] + datahub_emitter.add_task( + inputs=inputs, + outputs=outputs, + ) + + task_run_ctx = mock_run_context[0] + flow_run_ctx = mock_run_context[1] + + expected_datajob_urn = ( + f"urn:li:dataJob:(urn:li:dataFlow:" + f"(prefect,{flow_run_ctx.flow.name},prod),{task_run_ctx.task.task_key})" + ) + + assert expected_datajob_urn in datahub_emitter.datajobs_to_emit.keys() + actual_datajob = datahub_emitter.datajobs_to_emit[expected_datajob_urn] + assert isinstance(actual_datajob, DataJob) + assert str(actual_datajob.flow_urn) == "urn:li:dataFlow:(prefect,etl,prod)" + assert actual_datajob.name == task_run_ctx.task.name + assert actual_datajob.description == task_run_ctx.task.description + assert actual_datajob.tags == task_run_ctx.task.tags + assert ( + str(actual_datajob.inlets[0]) + == "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)" + ) + assert ( + str(actual_datajob.outlets[0]) + == "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)" + ) + assert mock_emit.emit.call_count == 0 + + +@patch("prefect_datahub.datahub_emitter.DatahubRestEmitter", autospec=True) +def test_emit_flow( + mock_emit, mock_run_context, mock_prefect_client, mock_prefect_cloud_client +): + mock_emitter = Mock() + mock_emit.return_value = mock_emitter + + platform_instance = "datahub_workspace" + + datahub_emitter = DatahubEmitter(platform_instance=platform_instance) + datahub_emitter.add_task() + datahub_emitter.emit_flow() + + task_run_ctx = mock_run_context[0] + flow_run_ctx = mock_run_context[1] + + expected_dataflow_urn = ( + f"urn:li:dataFlow:(prefect,{platform_instance}.{flow_run_ctx.flow.name},prod)" + ) + + assert mock_emitter.method_calls[1][1][0].aspectName == "dataFlowInfo" + assert mock_emitter.method_calls[1][1][0].entityUrn == expected_dataflow_urn + assert mock_emitter.method_calls[2][1][0].aspectName == "ownership" + assert mock_emitter.method_calls[2][1][0].entityUrn == expected_dataflow_urn + assert mock_emitter.method_calls[3][1][0].aspectName == "globalTags" + assert mock_emitter.method_calls[3][1][0].entityUrn == expected_dataflow_urn + assert mock_emitter.method_calls[4][1][0].aspectName == "browsePaths" + assert mock_emitter.method_calls[4][1][0].entityUrn == expected_dataflow_urn + assert ( + mock_emitter.method_calls[8][1][0].aspectName == "dataProcessInstanceProperties" + ) + assert ( + mock_emitter.method_calls[8][1][0].entityUrn + == "urn:li:dataProcessInstance:a95d24db6abd98384fc1d4c8540098a4" + ) + assert ( + mock_emitter.method_calls[9][1][0].aspectName + == "dataProcessInstanceRelationships" + ) + assert ( + mock_emitter.method_calls[9][1][0].entityUrn + == "urn:li:dataProcessInstance:a95d24db6abd98384fc1d4c8540098a4" + ) + assert ( + mock_emitter.method_calls[10][1][0].aspectName == "dataProcessInstanceRunEvent" + ) + assert ( + mock_emitter.method_calls[10][1][0].entityUrn + == "urn:li:dataProcessInstance:a95d24db6abd98384fc1d4c8540098a4" + ) + assert mock_emitter.method_calls[11][1][0].aspectName == "dataJobInfo" + assert ( + mock_emitter.method_calls[11][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.extract)" + ) + assert mock_emitter.method_calls[12][1][0].aspectName == "dataJobInputOutput" + assert ( + mock_emitter.method_calls[12][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.extract)" + ) + assert mock_emitter.method_calls[13][1][0].aspectName == "ownership" + assert ( + mock_emitter.method_calls[13][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.extract)" + ) + assert mock_emitter.method_calls[14][1][0].aspectName == "globalTags" + assert ( + mock_emitter.method_calls[14][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.extract)" + ) + assert mock_emitter.method_calls[15][1][0].aspectName == "browsePaths" + assert ( + mock_emitter.method_calls[15][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.extract)" + ) + assert ( + mock_emitter.method_calls[16][1][0].aspectName + == "dataProcessInstanceProperties" + ) + assert ( + mock_emitter.method_calls[16][1][0].entityUrn + == "urn:li:dataProcessInstance:bf5eab177af0097bbff6a41694f39af9" + ) + assert ( + mock_emitter.method_calls[17][1][0].aspectName + == "dataProcessInstanceRelationships" + ) + assert ( + mock_emitter.method_calls[17][1][0].entityUrn + == "urn:li:dataProcessInstance:bf5eab177af0097bbff6a41694f39af9" + ) + assert ( + mock_emitter.method_calls[18][1][0].aspectName == "dataProcessInstanceRunEvent" + ) + assert ( + mock_emitter.method_calls[18][1][0].entityUrn + == "urn:li:dataProcessInstance:bf5eab177af0097bbff6a41694f39af9" + ) + assert ( + mock_emitter.method_calls[19][1][0].aspectName == "dataProcessInstanceRunEvent" + ) + assert ( + mock_emitter.method_calls[19][1][0].entityUrn + == "urn:li:dataProcessInstance:bf5eab177af0097bbff6a41694f39af9" + ) + assert mock_emitter.method_calls[20][1][0].aspectName == "dataJobInfo" + assert ( + mock_emitter.method_calls[20][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.load)" + ) + assert mock_emitter.method_calls[21][1][0].aspectName == "dataJobInputOutput" + assert ( + mock_emitter.method_calls[21][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.load)" + ) + assert mock_emitter.method_calls[22][1][0].aspectName == "ownership" + assert ( + mock_emitter.method_calls[22][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.load)" + ) + assert mock_emitter.method_calls[23][1][0].aspectName == "globalTags" + assert ( + mock_emitter.method_calls[23][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.load)" + ) + assert mock_emitter.method_calls[24][1][0].aspectName == "browsePaths" + assert ( + mock_emitter.method_calls[24][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.load)" + ) + assert ( + mock_emitter.method_calls[25][1][0].aspectName + == "dataProcessInstanceProperties" + ) + assert ( + mock_emitter.method_calls[25][1][0].entityUrn + == "urn:li:dataProcessInstance:095673536b61e6f25c7691af0d2cc317" + ) + assert ( + mock_emitter.method_calls[26][1][0].aspectName + == "dataProcessInstanceRelationships" + ) + assert ( + mock_emitter.method_calls[26][1][0].entityUrn + == "urn:li:dataProcessInstance:095673536b61e6f25c7691af0d2cc317" + ) + assert ( + mock_emitter.method_calls[27][1][0].aspectName == "dataProcessInstanceRunEvent" + ) + assert ( + mock_emitter.method_calls[27][1][0].entityUrn + == "urn:li:dataProcessInstance:095673536b61e6f25c7691af0d2cc317" + ) + assert ( + mock_emitter.method_calls[28][1][0].aspectName == "dataProcessInstanceRunEvent" + ) + assert ( + mock_emitter.method_calls[28][1][0].entityUrn + == "urn:li:dataProcessInstance:095673536b61e6f25c7691af0d2cc317" + ) + assert mock_emitter.method_calls[29][1][0].aspectName == "dataJobInfo" + assert ( + mock_emitter.method_calls[29][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.transform)" + ) + assert mock_emitter.method_calls[30][1][0].aspectName == "dataJobInputOutput" + assert ( + mock_emitter.method_calls[30][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.transform)" + ) + assert mock_emitter.method_calls[31][1][0].aspectName == "ownership" + assert ( + mock_emitter.method_calls[31][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.transform)" + ) + assert mock_emitter.method_calls[32][1][0].aspectName == "globalTags" + assert ( + mock_emitter.method_calls[32][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.transform)" + ) + assert ( + mock_emitter.method_calls[32][1][0].aspect.tags[0].tag + == f"urn:li:tag:{task_run_ctx.task.tags[0]}" + ) + assert mock_emitter.method_calls[33][1][0].aspectName == "browsePaths" + assert ( + mock_emitter.method_calls[33][1][0].entityUrn + == f"urn:li:dataJob:({expected_dataflow_urn},__main__.transform)" + ) + assert ( + mock_emitter.method_calls[34][1][0].aspectName + == "dataProcessInstanceProperties" + ) + assert ( + mock_emitter.method_calls[34][1][0].entityUrn + == "urn:li:dataProcessInstance:04ba0f8064b2c45f69da571c434f1c69" + ) + assert ( + mock_emitter.method_calls[35][1][0].aspectName + == "dataProcessInstanceRelationships" + ) + assert ( + mock_emitter.method_calls[35][1][0].entityUrn + == "urn:li:dataProcessInstance:04ba0f8064b2c45f69da571c434f1c69" + ) + assert ( + mock_emitter.method_calls[36][1][0].aspectName == "dataProcessInstanceRunEvent" + ) + assert ( + mock_emitter.method_calls[36][1][0].entityUrn + == "urn:li:dataProcessInstance:04ba0f8064b2c45f69da571c434f1c69" + ) + assert ( + mock_emitter.method_calls[37][1][0].aspectName == "dataProcessInstanceRunEvent" + ) + assert ( + mock_emitter.method_calls[37][1][0].entityUrn + == "urn:li:dataProcessInstance:04ba0f8064b2c45f69da571c434f1c69" + ) From a6f2a5ec975d1002ed453682b5ee7856c25fe7f8 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Thu, 13 Jul 2023 11:00:14 +0530 Subject: [PATCH 02/21] prefect-dataub package integrated with datahub --- .github/workflows/build-and-test.yml | 5 +-- docs-website/build.gradle | 3 +- docs-website/generateDocsDir.ts | 1 + docs-website/sidebars.js | 9 +++++ docs/lineage/prefect.md | 49 ++++++++++++++++++++++++++++ settings.gradle | 1 + 6 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 docs/lineage/prefect.md diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index d266e1a7fd31f..3c2406df2c31d 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -26,9 +26,10 @@ jobs: matrix: command: [ - "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :datahub-frontend:build -x :datahub-web-react:build --parallel", + "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:prefect-datahub:build -x :datahub-frontend:build -x :datahub-web-react:build --parallel", "./gradlew :datahub-frontend:build :datahub-web-react:build --parallel", - "./gradlew :metadata-ingestion-modules:airflow-plugin:build --parallel" + "./gradlew :metadata-ingestion-modules:airflow-plugin:build --parallel", + "./gradlew :metadata-ingestion-modules:prefect-datahub:build --parallel" ] timezone: [ diff --git a/docs-website/build.gradle b/docs-website/build.gradle index 12f37033efc2f..5df8ea656234e 100644 --- a/docs-website/build.gradle +++ b/docs-website/build.gradle @@ -71,7 +71,8 @@ task yarnInstall(type: YarnTask) { task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall, generateGraphQLSchema, generateJsonSchema, ':metadata-ingestion:modelDocGen', ':metadata-ingestion:docGen', - ':metadata-ingestion:buildWheel', ':metadata-ingestion-modules:airflow-plugin:buildWheel'] ) { + ':metadata-ingestion:buildWheel', ':metadata-ingestion-modules:airflow-plugin:buildWheel', + ':metadata-ingestion-modules:prefect-datahub:buildWheel'] ) { inputs.files(projectMdFiles) outputs.cacheIf { true } args = ['run', 'generate'] diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts index ee6e6b586615c..1633d95ed9181 100644 --- a/docs-website/generateDocsDir.ts +++ b/docs-website/generateDocsDir.ts @@ -572,6 +572,7 @@ function copy_python_wheels(): void { const wheel_dirs = [ "../metadata-ingestion/dist", "../metadata-ingestion-modules/airflow-plugin/dist", + "../metadata-ingestion-modules/prefect-datahub/dist", ]; const wheel_output_directory = path.join(STATIC_DIRECTORY, "wheels"); diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 85fa61b88ab7e..f28c68d096f3e 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -91,6 +91,11 @@ module.exports = { id: "docs/lineage/airflow", label: "Airflow", }, + { + type: "doc", + id: "docs/lineage/prefect", + label: "Prefect", + }, //"docker/airflow/local_airflow", "metadata-integration/java/spark-lineage/README", @@ -614,6 +619,10 @@ module.exports = { //"docs/how/graph-onboarding", //"docs/demo/graph-onboarding", //"metadata-ingestion-modules/airflow-plugin/README" + //"metadata-ingestion-modules/prefect-datahub/README" + //"metadata-ingestion-modules/prefect-datahub/MAINTAINERS" + //"metadata-ingestion-modules/prefect-datahub/docs/concept_mapping" + //"metadata-ingestion-modules/prefect-datahub/docs/datahub_emitter" // "metadata-ingestion/schedule_docs/datahub", // we can delete this // TODO: change the titles of these, removing the "What is..." portion from the sidebar" // "docs/what/entity", diff --git a/docs/lineage/prefect.md b/docs/lineage/prefect.md new file mode 100644 index 0000000000000..95a033937aeb4 --- /dev/null +++ b/docs/lineage/prefect.md @@ -0,0 +1,49 @@ +# Prefect Integration + +DataHub supports integration of + +- Prefect flow and task metadata +- Flow run and Task run information as well as +- Lineage information when present + +## What is Prefect Datahub Block? + +Blocks are primitive within Prefect that enable the storage of configuration and provide an interface for interacting with external systems. We integrated [prefect-datahub](https://prefecthq.github.io/prefect-datahub/) block which use [Datahub Rest](../../metadata-ingestion/sink_docs/datahub.md#datahub-rest) emitter to emit metadata events while running prefect flow. + +## Prerequisites to use Prefect Datahub Block + +1. You need to use either Prefect Cloud (recommended) or the self hosted Prefect server. +2. Refer [Cloud Quickstart](https://docs.prefect.io/2.10.13/cloud/cloud-quickstart/) to setup Prefect Cloud. +3. Refer [Host Prefect server](https://docs.prefect.io/2.10.13/host/) to setup self hosted Prefect server. +4. Make sure the Prefect api url is set correctly. You can check it by running below command: +```shell +prefect profile inspect +``` +5. If you are using Prefect Cloud, the API URL should be set as `https://api.prefect.cloud/api/accounts//workspaces/`. +6. If you are using a self-hosted Prefect server, the API URL should be set as `http://:/api`. + +## Setup + +For setup detail please refer [prefct-datahub](https://prefecthq.github.io/prefect-datahub/). + +## How to validate saved block and emit of metadata + +1. Go and check in Prefect UI at Blocks menu if you can see the datahub emitter. +2. Run a Prefect workflow. In the flow logs, you should see Datahub related log messages like: + +``` +Emitting flow to datahub... +Emitting tasks to datahub... +``` +## Debugging + +### Incorrect Prefect API URL + +If your Prefect API URL aren't being generated correctly or set incorrectly, then in that case you can set the Prefect API URL manually as show below: + +```shell +prefect config set PREFECT_API_URL='http://127.0.0.1:4200/api' +``` + +### Connection error for Datahub Rest URL +If you get ConnectionError: HTTPConnectionPool(host='localhost', port=8080), then in that case your GMS service is not up. \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index e4d1702829bc8..107f1f10c4f7b 100644 --- a/settings.gradle +++ b/settings.gradle @@ -54,6 +54,7 @@ include 'metadata-integration:java:datahub-client' include 'metadata-integration:java:datahub-protobuf' include 'ingestion-scheduler' include 'metadata-ingestion-modules:airflow-plugin' +include 'metadata-ingestion-modules:prefect-datahub' include 'smoke-test' include 'metadata-auth:auth-api' include 'metadata-service:schema-registry-api' From e84c1ac7cf79b5033ca0fbf966b2d7ed0f65d9b5 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Thu, 13 Jul 2023 11:11:47 +0530 Subject: [PATCH 03/21] Prefect doc Spell mistake corrected --- docs/lineage/prefect.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/lineage/prefect.md b/docs/lineage/prefect.md index 95a033937aeb4..606f672405079 100644 --- a/docs/lineage/prefect.md +++ b/docs/lineage/prefect.md @@ -24,11 +24,11 @@ prefect profile inspect ## Setup -For setup detail please refer [prefct-datahub](https://prefecthq.github.io/prefect-datahub/). +For setup details please refer [prefect-datahub](https://prefecthq.github.io/prefect-datahub/). ## How to validate saved block and emit of metadata -1. Go and check in Prefect UI at Blocks menu if you can see the datahub emitter. +1. Go and check in Prefect UI at the Blocks menu if you can see the datahub emitter. 2. Run a Prefect workflow. In the flow logs, you should see Datahub related log messages like: ``` From 2bef6cb34123db74061d8510cc5d2919a115daa9 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Thu, 13 Jul 2023 18:33:24 +0530 Subject: [PATCH 04/21] Remove not necessary md file --- docs-website/sidebars.js | 1 - .../prefect-datahub/MAINTAINERS.md | 114 ------------------ 2 files changed, 115 deletions(-) delete mode 100644 metadata-ingestion-modules/prefect-datahub/MAINTAINERS.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index f28c68d096f3e..5ba72ec8f35df 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -620,7 +620,6 @@ module.exports = { //"docs/demo/graph-onboarding", //"metadata-ingestion-modules/airflow-plugin/README" //"metadata-ingestion-modules/prefect-datahub/README" - //"metadata-ingestion-modules/prefect-datahub/MAINTAINERS" //"metadata-ingestion-modules/prefect-datahub/docs/concept_mapping" //"metadata-ingestion-modules/prefect-datahub/docs/datahub_emitter" // "metadata-ingestion/schedule_docs/datahub", // we can delete this diff --git a/metadata-ingestion-modules/prefect-datahub/MAINTAINERS.md b/metadata-ingestion-modules/prefect-datahub/MAINTAINERS.md deleted file mode 100644 index b58c764f875c2..0000000000000 --- a/metadata-ingestion-modules/prefect-datahub/MAINTAINERS.md +++ /dev/null @@ -1,114 +0,0 @@ -# prefect-datahub - -## Getting Started - -Now that you've bootstrapped a project, follow the steps below to get started developing your Prefect Collection! - -### Python setup - -Requires an installation of Python 3.7+ - -We recommend using a Python virtual environment manager such as pipenv, conda or virtualenv. - -### GitHub setup - -Create a Git respoitory for the newly generated collection and create the first commit: - -```bash -git init -git add . -git commit -m "Initial commit: project generated by prefect-collection-template" -``` - -Then, create a new repo following the prompts at: -https://github.com/organizations/shubhamjagtap639/repositories/new - -Upon creation, push the repository to GitHub: -```bash -git remote add origin https://github.com/shubhamjagtap639/prefect-datahub.git -git branch -M main -git push -u origin main -``` - -It's recommended to setup some protection rules for main at: -https://github.com/shubhamjagtap639/prefect-datahub/settings/branches - -- Require a pull request before merging -- Require approvals - -Lastly, [code owners](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners) for the repository can be set, like this [example here](https://github.com/PrefectHQ/prefect/blob/master/.github/CODEOWNERS). - -### Project setup - -To setup your project run the following: - -```bash -# Create an editable install of your project -pip install -e ".[dev]" - -# Configure pre-commit hooks -pre-commit install -``` - -To verify the setup was successful you can run the following: - -- Run the tests for tasks and flows in the collection: - ```bash - pytest tests - ``` -- Serve the docs with `mkdocs`: - ```bash - mkdocs serve - ``` - -## Developing tasks and flows - -For information about the use and development of tasks and flow, check out the [flows](https://docs.prefect.io/concepts/flows/) and [tasks](https://docs.prefect.io/concepts/tasks/) concepts docs in the Prefect docs. - -## Writing documentation - -This collection has been setup to with [mkdocs](https://www.mkdocs.org/) for automatically generated documentation. The signatures and docstrings of your tasks and flow will be used to generate documentation for the users of this collection. You can make changes to the structure of the generated documentation by editing the `mkdocs.yml` file in this project. - -To add a new page for a module in your collection, create a new markdown file in the `docs` directory and add that file to the `nav` section of `mkdocs.yml`. If you want to automatically generate documentation based on the docstrings and signatures of the contents of the module with `mkdocstrings`, add a line to the new markdown file in the following format: - -```markdown -::: prefect_datahub.{module_name} -``` - -You can also refer to the `flows.md` and `tasks.md` files included in your generated project as examples. - -Once you have working code, replace the default "Write and run a flow" example in `README.md` to match your collection. - -## Development lifecycle - -### CI Pipeline - -This collection comes with [GitHub Actions](https://docs.github.com/en/actions) for testing and linting. To add additional actions, you can add jobs in the `.github/workflows` folder. Upon a pull request, the pipeline will run linting via [`black`](https://black.readthedocs.io/en/stable/), [`flake8`](https://flake8.pycqa.org/en/latest/), [`interrogate`](https://interrogate.readthedocs.io/en/latest/), and unit tests via `pytest` alongside `coverage`. - -`interrogate` will tell you which methods, functions, classes, and modules have docstrings, and which do not--the job has a fail threshold of 95%, meaning that it will fail if more than 5% of the codebase is undocumented. We recommend following the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for docstring format. - -Simiarly, `coverage` ensures that the codebase includes tests--the job has a fail threshold of 80%, meaning that it will fail if more than 20% of the codebase is missing tests. - -### Track Issues on Project Board - -To automatically add issues to a GitHub Project Board, you'll need a [secret added](https://docs.github.com/en/actions/security-guides/encrypted-secrets#creating-encrypted-secrets-for-an-environment) to the repository. Specifically, a secret named `ADD_TO_PROJECT_URL`, formatted like `https://github.com/orgs//projects/`. - -### Package and Publish - -GitHub actions will handle packaging and publishing of your collection to [PyPI](https://pypi.org/) so other Prefect users can your collection in their flows. - -To publish to PyPI, you'll need a PyPI account and to generate an API token to authenticate with PyPI when publishing new versions of your collection. The [PyPI documentation](https://pypi.org/help/#apitoken) outlines the steps needed to get an API token. - -Once you've obtained a PyPI API token, [create a GitHub secret](https://docs.github.com/en/actions/security-guides/encrypted-secrets#creating-encrypted-secrets-for-a-repository) named `PYPI_API_TOKEN`. - -To publish a new version of your collection, [create a new GitHub release](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository#creating-a-release) and tag it with the version that you want to deploy (e.g. v0.3.2). This will trigger a workflow to publish the new version on PyPI and deploy the updated docs to GitHub pages. - -Upon publishing, a `docs` branch is automatically created. To hook this up to GitHub Pages, simply head over to https://github.com/shubhamjagtap639/prefect-datahub/settings/pages, select `docs` under the dropdown menu, keep the default `/root` folder, `Save`, and upon refresh, you should see a prompt stating "Your site is published at https://shubhamjagtap639.github.io/prefect-datahub". Don't forget to add this link to the repo's "About" section, under "Website" so users can access the docs easily. - -Feel free to [submit your collection](https://docs.prefect.io/collections/overview/#listing-in-the-collections-catalog) to the Prefect [Collections Catalog](https://docs.prefect.io/collections/catalog/)! - -## Further guidance - -If you run into any issues during the bootstrapping process, feel free to open an issue in the [prefect-collection-template](https://github.com/PrefectHQ/prefect-collection-template) repository. - -If you have any questions or issues while developing your collection, you can find help in either the [Prefect Discourse forum](https://discourse.prefect.io/) or the [Prefect Slack community](https://prefect.io/slack). From 0910f0932b9ea8244618be0fec7a9ca5b9fd669c Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Fri, 14 Jul 2023 11:49:29 +0530 Subject: [PATCH 05/21] Version added for some pakages in prefect-datahub --- .../prefect-datahub/requirements-dev.txt | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt index be4d2406f2975..164e800691abc 100644 --- a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt +++ b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt @@ -1,16 +1,17 @@ -pytest -black -flake8 -mypy +pytest>=6.2.2 +black>=21.12b0 +flake8>=3.8.3 +flake8-tidy-imports>=4.3.0 +mypy>=0.920 mkdocs mkdocs-material mkdocstrings[python] -isort +isort>=5.7.0 pre-commit -pytest-asyncio +pytest-asyncio>=0.16.0 mock; python_version < '3.8' mkdocs-gen-files interrogate -coverage +coverage>=5.1 pillow types-requests \ No newline at end of file From 5efc31e84ffb9a4799b8e1fdfd033d3671aa8b4f Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Fri, 14 Jul 2023 16:11:54 +0530 Subject: [PATCH 06/21] Prefect version 2.0.0 restriction removed --- metadata-ingestion-modules/prefect-datahub/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion-modules/prefect-datahub/requirements.txt b/metadata-ingestion-modules/prefect-datahub/requirements.txt index db5c355c97f8a..e1672645d3c67 100644 --- a/metadata-ingestion-modules/prefect-datahub/requirements.txt +++ b/metadata-ingestion-modules/prefect-datahub/requirements.txt @@ -1,2 +1,2 @@ -prefect>=2.0.0 +prefect acryl-datahub[datahub-rest] \ No newline at end of file From 43bf87f522b6170bc207d0ba4b030507be930f8b Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Mon, 17 Jul 2023 16:57:27 +0530 Subject: [PATCH 07/21] Prefect version set to >=2.0.0 --- metadata-ingestion-modules/prefect-datahub/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion-modules/prefect-datahub/requirements.txt b/metadata-ingestion-modules/prefect-datahub/requirements.txt index e1672645d3c67..db5c355c97f8a 100644 --- a/metadata-ingestion-modules/prefect-datahub/requirements.txt +++ b/metadata-ingestion-modules/prefect-datahub/requirements.txt @@ -1,2 +1,2 @@ -prefect +prefect>=2.0.0 acryl-datahub[datahub-rest] \ No newline at end of file From 8b027a1a8e03941feae081c6d9a0d93262d90597 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Mon, 17 Jul 2023 18:47:06 +0530 Subject: [PATCH 08/21] prefect-datahub build error fixed for python 3.7 --- .../prefect-datahub/requirements-dev.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt index 164e800691abc..ec82c9f9f6a2c 100644 --- a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt +++ b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt @@ -14,4 +14,6 @@ mkdocs-gen-files interrogate coverage>=5.1 pillow -types-requests \ No newline at end of file +types-requests +# For python 3.7 and importlib-metadata>=5.0.0, build failed with attribute error +importlib-metadata>=4.4.0,<5.0.0; python_version < '3.8' \ No newline at end of file From a4ed11a7f9670c55dffbbd8f1c86c78682f711f8 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Mon, 17 Jul 2023 20:00:42 +0530 Subject: [PATCH 09/21] mypy stubs packages added --- .../prefect-datahub/requirements-dev.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt index ec82c9f9f6a2c..3e84f17100252 100644 --- a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt +++ b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt @@ -14,6 +14,20 @@ mkdocs-gen-files interrogate coverage>=5.1 pillow +# mypy stubs packages +types-dataclasses +sqlalchemy-stubs +types-six +types-python-dateutil types-requests +types-toml +types-PyYAML +types-freezegun +types-cachetools +# versions 0.1.13 and 0.1.14 seem to have issues +types-click==0.1.12 +types-tabulate +# avrogen package requires this +types-pytz # For python 3.7 and importlib-metadata>=5.0.0, build failed with attribute error importlib-metadata>=4.4.0,<5.0.0; python_version < '3.8' \ No newline at end of file From 0a7d63efaaf64624fb90867e012dcc95f01a4da0 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Tue, 18 Jul 2023 11:30:01 +0530 Subject: [PATCH 10/21] acryl-datahub package added --- metadata-ingestion-modules/prefect-datahub/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion-modules/prefect-datahub/requirements.txt b/metadata-ingestion-modules/prefect-datahub/requirements.txt index db5c355c97f8a..be3a952264ef5 100644 --- a/metadata-ingestion-modules/prefect-datahub/requirements.txt +++ b/metadata-ingestion-modules/prefect-datahub/requirements.txt @@ -1,2 +1,2 @@ prefect>=2.0.0 -acryl-datahub[datahub-rest] \ No newline at end of file +acryl-datahub \ No newline at end of file From 1f5b9caeb5228096991dd016702cab6b731e72cd Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Tue, 18 Jul 2023 13:58:37 +0530 Subject: [PATCH 11/21] Added some missing required setup files --- .../prefect-datahub/pyproject.toml | 20 ++++ .../prefect-datahub/setup.cfg | 91 +++++++++++++------ .../prefect-datahub/setup.py | 4 +- .../prefect-datahub/tests/conftest.py | 27 ++++-- .../tests/test_block_standards.py | 12 ++- .../tests/test_datahub_emitter.py | 7 +- .../prefect-datahub/tox.ini | 35 +++++++ 7 files changed, 146 insertions(+), 50 deletions(-) create mode 100644 metadata-ingestion-modules/prefect-datahub/pyproject.toml create mode 100644 metadata-ingestion-modules/prefect-datahub/tox.ini diff --git a/metadata-ingestion-modules/prefect-datahub/pyproject.toml b/metadata-ingestion-modules/prefect-datahub/pyproject.toml new file mode 100644 index 0000000000000..83b79e3146176 --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/pyproject.toml @@ -0,0 +1,20 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"] + +[tool.black] +extend-exclude = ''' +# A regex preceded with ^/ will apply only to files and directories +# in the root of the project. +^/tmp +''' +include = '\.pyi?$' +target-version = ['py36', 'py37', 'py38'] + +[tool.isort] +indent = ' ' +profile = 'black' +sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' + +[tool.pyright] +extraPaths = ['tests'] \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-datahub/setup.cfg b/metadata-ingestion-modules/prefect-datahub/setup.cfg index 17d7e84c47415..2232f9acd13df 100644 --- a/metadata-ingestion-modules/prefect-datahub/setup.cfg +++ b/metadata-ingestion-modules/prefect-datahub/setup.cfg @@ -1,39 +1,70 @@ [flake8] -exclude = .git,__pycache__,build,dist +max-complexity = 15 +ignore = + # Ignore: line length issues, since black's formatter will take care of them. + E501, + # Ignore: 1 blank line required before class docstring. + D203, + # See https://stackoverflow.com/a/57074416. + W503, + # See https://github.com/psf/black/issues/315. + E203 +exclude = + .git, + venv, + .tox, + __pycache__ per-file-ignores = - setup.py:E501 -# Match black line-length -max-line-length = 88 -extend-ignore = - E203, + # imported but unused + __init__.py: F401 +ban-relative-imports = true -[isort] -skip = __init__.py -profile = black -skip_gitignore = True -multi_line_output = 3 +[mypy] +plugins = + sqlmypy, + pydantic.mypy +exclude = ^(venv|build|dist)/ +ignore_missing_imports = yes +strict_optional = yes +check_untyped_defs = yes +disallow_incomplete_defs = yes +disallow_untyped_decorators = yes +warn_unused_configs = yes +# eventually we'd like to enable these +disallow_untyped_defs = no -[versioneer] -VCS = git -style = pep440 -versionfile_source = prefect_datahub/_version.py -versionfile_build = prefect_datahub/_version.py -tag_prefix = v -parentdir_prefix = +# try to be a bit more strict in certain areas of the codebase +[mypy-datahub.*] +ignore_missing_imports = no +[mypy-tests.*] +ignore_missing_imports = no -[tool:interrogate] -ignore-init-module = True -ignore_init_method = True -exclude = prefect_datahub/_version.py, tests, setup.py, versioneer.py, docs, site -fail-under = 95 -omit-covered-files = True +[tool:pytest] +asyncio_mode = auto + +testpaths = + tests [coverage:run] -omit = tests/*, prefect_datahub/_version.py +# Because of some quirks in the way setup.cfg, coverage.py, pytest-cov, +# and tox interact, we should not uncomment the following line. +# See https://pytest-cov.readthedocs.io/en/latest/config.html and +# https://coverage.readthedocs.io/en/coverage-5.0/config.html. +# We also have some additional pytest/cov config options in tox.ini. +# source = prefect_datahub -[coverage:report] -fail_under = 80 -show_missing = True +[coverage:paths] +# This is necessary for tox-based coverage to be counted properly. +source = + prefect_datahub + */site-packages -[tool:pytest] -asyncio_mode = auto +[coverage:report] +# The fail_under value ensures that at least some coverage data is collected. +# We override its value in the tox config. +show_missing = true +exclude_lines = + pragma: no cover + @abstract + if TYPE_CHECKING: +#omit = diff --git a/metadata-ingestion-modules/prefect-datahub/setup.py b/metadata-ingestion-modules/prefect-datahub/setup.py index ebe484ce4c7a5..9ff01aa9a7632 100644 --- a/metadata-ingestion-modules/prefect-datahub/setup.py +++ b/metadata-ingestion-modules/prefect-datahub/setup.py @@ -29,8 +29,8 @@ install_requires=install_requires, extras_require={"dev": dev_requires}, entry_points={ - "prefect.collections": [ - "prefect_datahub = prefect_datahub", + "prefect.datahub": [ + "prefect_datahub = prefect_datahub.datahub_emitter:DatahubEmitter", ] }, classifiers=[ diff --git a/metadata-ingestion-modules/prefect-datahub/tests/conftest.py b/metadata-ingestion-modules/prefect-datahub/tests/conftest.py index ee0fabc712966..e22c46f043098 100644 --- a/metadata-ingestion-modules/prefect-datahub/tests/conftest.py +++ b/metadata-ingestion-modules/prefect-datahub/tests/conftest.py @@ -1,7 +1,7 @@ import asyncio import json import logging -from typing import Dict, List +from typing import Dict, List, cast from unittest.mock import MagicMock, patch from uuid import UUID @@ -9,6 +9,7 @@ from prefect.client.schemas import FlowRun, TaskRun, Workspace from prefect.futures import PrefectFuture from prefect.server.schemas.core import Flow +from prefect.task_runners import SequentialTaskRunner from requests.models import Response mock_transform_task_json: Dict = { @@ -369,24 +370,30 @@ async def mock_task_run_future(): - extract_prefect_future = PrefectFuture( + extract_prefect_future: PrefectFuture = PrefectFuture( name=mock_extract_task_run_json["name"], key=UUID("4552629a-ac04-4590-b286-27642292739f"), - task_runner=None, + task_runner=SequentialTaskRunner(), ) - extract_prefect_future.task_run = TaskRun.parse_obj(mock_extract_task_run_json) - transform_prefect_future = PrefectFuture( + extract_prefect_future.task_run = cast( + None, TaskRun.parse_obj(mock_extract_task_run_json) + ) + transform_prefect_future: PrefectFuture = PrefectFuture( name=mock_transform_task_run_json["name"], key=UUID("40fff3e5-5ef4-4b8b-9cc8-786f91bcc656"), - task_runner=None, + task_runner=SequentialTaskRunner(), + ) + transform_prefect_future.task_run = cast( + None, TaskRun.parse_obj(mock_transform_task_run_json) ) - transform_prefect_future.task_run = TaskRun.parse_obj(mock_transform_task_run_json) - load_prefect_future = PrefectFuture( + load_prefect_future: PrefectFuture = PrefectFuture( name=mock_load_task_run_json["name"], key=UUID("7565f596-9eb0-4330-ba34-963e7839883e"), - task_runner=None, + task_runner=SequentialTaskRunner(), + ) + load_prefect_future.task_run = cast( + None, TaskRun.parse_obj(mock_load_task_run_json) ) - load_prefect_future.task_run = TaskRun.parse_obj(mock_load_task_run_json) return [extract_prefect_future, transform_prefect_future, load_prefect_future] diff --git a/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py b/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py index 496c128309786..8c276bb6b393b 100644 --- a/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py +++ b/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py @@ -7,11 +7,13 @@ def find_module_blocks(): blocks = get_registry_for_type(Block) - module_blocks = [ - block - for block in blocks.values() - if to_qualified_name(block).startswith("prefect_datahub") - ] + module_blocks = [] + if blocks is not None: + module_blocks = [ + block + for block in blocks.values() + if to_qualified_name(block).startswith("prefect_datahub") + ] return module_blocks diff --git a/metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py b/metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py index e294374a149e1..e4499f3215b9a 100644 --- a/metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py +++ b/metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py @@ -1,9 +1,10 @@ import asyncio +from typing import List, Optional from unittest.mock import Mock, patch from datahub.api.entities.datajob import DataJob from datahub.utilities.urns.dataset_urn import DatasetUrn -from datahub_provider.entities import Dataset +from datahub_provider.entities import Dataset, _Entity from prefect_datahub.datahub_emitter import DatahubEmitter @@ -37,8 +38,8 @@ def test_add_task(mock_emit, mock_run_context): mock_emit.return_value = mock_emitter datahub_emitter = DatahubEmitter() - inputs = [Dataset("snowflake", "mydb.schema.tableA")] - outputs = [Dataset("snowflake", "mydb.schema.tableC")] + inputs: Optional[List[_Entity]] = [Dataset("snowflake", "mydb.schema.tableA")] + outputs: Optional[List[_Entity]] = [Dataset("snowflake", "mydb.schema.tableC")] datahub_emitter.add_task( inputs=inputs, outputs=outputs, diff --git a/metadata-ingestion-modules/prefect-datahub/tox.ini b/metadata-ingestion-modules/prefect-datahub/tox.ini new file mode 100644 index 0000000000000..0b8118e2d3f1f --- /dev/null +++ b/metadata-ingestion-modules/prefect-datahub/tox.ini @@ -0,0 +1,35 @@ +# tox (https://tox.readthedocs.io/) is a tool for running tests +# in multiple virtualenvs. This configuration file will run the +# test suite on all supported python versions. To use it, "pip install tox" +# and then run "tox" from this directory. + +[tox] +envlist = py3-quick,py3-full + +[gh-actions] +python = + 3.6: py3-full + 3.9: py3-full + +# Providing optional features that add dependencies from setup.py as deps here +# allows tox to recreate testenv when new dependencies are added to setup.py. +# Previous approach of using the tox global setting extras is not recommended +# as extras is only called when the testenv is created for the first time! +# see more here -> https://github.com/tox-dev/tox/issues/1105#issuecomment-448596282 + +[testenv] +deps = + -e ../../metadata-ingestion/[.dev] +commands = + pytest --cov={envsitepackagesdir}/datahub --cov={envsitepackagesdir}/datahub_provider \ + py3-quick: -m 'not integration and not slow_integration' --junit-xml=junit.quick.xml \ + py3-full: --cov-fail-under 65 --junit-xml=junit.full.xml \ + --continue-on-collection-errors \ + -vv + +setenv = + PREFECT_HOME = /tmp/prefect/thisshouldnotexist-{envname} + +[testenv:py3-full] +deps = + ../../metadata-ingestion/.[dev] From 39763238e0fa36f15cc21cd20dc6954879a140ec Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Tue, 18 Jul 2023 14:09:47 +0530 Subject: [PATCH 12/21] Extra packages added in requirements-dev --- .../prefect-datahub/requirements-dev.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt index 3e84f17100252..dc2fc4bc350a9 100644 --- a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt +++ b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt @@ -14,6 +14,11 @@ mkdocs-gen-files interrogate coverage>=5.1 pillow +dataclasses>=0.6; python_version < '3.7' +typing_extensions>=3.10.0.2 +mypy_extensions>=0.4.3 +typing-inspect +pydantic>=1.5.1 # mypy stubs packages types-dataclasses sqlalchemy-stubs From 70c298ad4e9f748cbbd57a5e3d2576f776457d44 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Tue, 18 Jul 2023 14:35:42 +0530 Subject: [PATCH 13/21] Added some extra packages --- .../prefect-datahub/requirements-dev.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt index dc2fc4bc350a9..4b3f59d7a9daa 100644 --- a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt +++ b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt @@ -19,6 +19,14 @@ typing_extensions>=3.10.0.2 mypy_extensions>=0.4.3 typing-inspect pydantic>=1.5.1 +tox +deepdiff +requests-mock +freezegun +jsonpickle +build +twine +packaging # mypy stubs packages types-dataclasses sqlalchemy-stubs From a14220386c5bd8ec1d85e6ab824da0c0aaaf309b Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Tue, 18 Jul 2023 15:36:57 +0530 Subject: [PATCH 14/21] temp changes --- metadata-ingestion-modules/prefect-datahub/setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metadata-ingestion-modules/prefect-datahub/setup.cfg b/metadata-ingestion-modules/prefect-datahub/setup.cfg index 2232f9acd13df..34796625504c6 100644 --- a/metadata-ingestion-modules/prefect-datahub/setup.cfg +++ b/metadata-ingestion-modules/prefect-datahub/setup.cfg @@ -38,6 +38,8 @@ disallow_untyped_defs = no ignore_missing_imports = no [mypy-tests.*] ignore_missing_imports = no +[mypy-datahub.metadata.*] +ignore_missing_imports = yes [tool:pytest] asyncio_mode = auto From 66124582db0a73b280d515d48b562dc2ae94f473 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Tue, 18 Jul 2023 16:28:58 +0530 Subject: [PATCH 15/21] Revert temp changes --- metadata-ingestion-modules/prefect-datahub/setup.cfg | 2 -- 1 file changed, 2 deletions(-) diff --git a/metadata-ingestion-modules/prefect-datahub/setup.cfg b/metadata-ingestion-modules/prefect-datahub/setup.cfg index 34796625504c6..2232f9acd13df 100644 --- a/metadata-ingestion-modules/prefect-datahub/setup.cfg +++ b/metadata-ingestion-modules/prefect-datahub/setup.cfg @@ -38,8 +38,6 @@ disallow_untyped_defs = no ignore_missing_imports = no [mypy-tests.*] ignore_missing_imports = no -[mypy-datahub.metadata.*] -ignore_missing_imports = yes [tool:pytest] asyncio_mode = auto From 2075ac2685f123f22fc82f19e0d7987a63ae1ba0 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Wed, 13 Sep 2023 11:58:40 +0530 Subject: [PATCH 16/21] Prefect plugin code modified as per latest airflow plugin code --- .github/workflows/build-and-test.yml | 4 +- .github/workflows/prefect-plugin.yml | 78 ++++++++++ .github/workflows/test-results.yml | 2 +- docs-website/build.gradle | 2 +- docs-website/generateDocsDir.ts | 2 +- docs-website/sidebars.js | 6 +- docs/lineage/prefect.md | 2 +- .../prefect-datahub/MANIFEST.in | 14 -- .../prefect-datahub/requirements-dev.txt | 46 ------ .../prefect-datahub/requirements.txt | 2 - .../prefect-datahub/setup.py | 48 ------ .../tests/test_block_standards.py | 24 --- .../.gitignore | 2 +- .../README.md | 42 ++---- .../build.gradle | 72 +++++---- .../docs/concept_mapping.md | 0 .../docs/datahub_emitter.md | 0 .../docs/gen_blocks_catalog.py | 3 +- .../docs/gen_examples_catalog.py | 0 .../docs/gen_home_page.py | 0 .../docs/img/favicon.ico | Bin .../img/prefect-logo-mark-solid-white-500.png | Bin .../docs/img/prefect-logo-white.png | Bin .../integrations/analytics/custom.html | 0 .../docs/stylesheets/extra.css | 0 .../mkdocs.yml | 6 +- .../pyproject.toml | 1 - .../scripts/release.sh | 8 +- .../setup.cfg | 12 +- .../prefect-plugin/setup.py | 138 ++++++++++++++++++ .../src}/prefect_datahub/__init__.py | 4 +- .../src}/prefect_datahub/datahub_emitter.py | 20 +-- .../src/prefect_datahub/dataset.py | 46 ++++++ .../src/prefect_datahub/example/__init__.py | 0 .../src/prefect_datahub/example/flow.py | 32 ++++ .../src/prefect_datahub/example/save_block.py | 7 + .../integration/integration_test_dummy.py | 2 + .../tests/unit}/conftest.py | 0 .../tests/unit/test_block_standards.py | 45 ++++++ .../tests/unit}/test_datahub_emitter.py | 2 +- .../tox.ini | 0 metadata-ingestion/developing.md | 10 ++ settings.gradle | 2 +- 43 files changed, 451 insertions(+), 233 deletions(-) create mode 100644 .github/workflows/prefect-plugin.yml delete mode 100644 metadata-ingestion-modules/prefect-datahub/MANIFEST.in delete mode 100644 metadata-ingestion-modules/prefect-datahub/requirements-dev.txt delete mode 100644 metadata-ingestion-modules/prefect-datahub/requirements.txt delete mode 100644 metadata-ingestion-modules/prefect-datahub/setup.py delete mode 100644 metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/.gitignore (97%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/README.md (67%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/build.gradle (58%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/concept_mapping.md (100%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/datahub_emitter.md (100%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/gen_blocks_catalog.py (95%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/gen_examples_catalog.py (100%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/gen_home_page.py (100%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/img/favicon.ico (100%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/img/prefect-logo-mark-solid-white-500.png (100%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/img/prefect-logo-white.png (100%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/overrides/partials/integrations/analytics/custom.html (100%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/docs/stylesheets/extra.css (100%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/mkdocs.yml (92%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/pyproject.toml (90%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/scripts/release.sh (63%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/setup.cfg (88%) create mode 100644 metadata-ingestion-modules/prefect-plugin/setup.py rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin/src}/prefect_datahub/__init__.py (78%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin/src}/prefect_datahub/datahub_emitter.py (97%) create mode 100644 metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/dataset.py create mode 100644 metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/__init__.py create mode 100644 metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/flow.py create mode 100644 metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/save_block.py create mode 100644 metadata-ingestion-modules/prefect-plugin/tests/integration/integration_test_dummy.py rename metadata-ingestion-modules/{prefect-datahub/tests => prefect-plugin/tests/unit}/conftest.py (100%) create mode 100644 metadata-ingestion-modules/prefect-plugin/tests/unit/test_block_standards.py rename metadata-ingestion-modules/{prefect-datahub/tests => prefect-plugin/tests/unit}/test_datahub_emitter.py (99%) rename metadata-ingestion-modules/{prefect-datahub => prefect-plugin}/tox.ini (100%) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 0e02ed35eaaca..90d1c52537bcf 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -26,10 +26,10 @@ jobs: matrix: command: [ - "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:prefect-datahub:build -x :datahub-frontend:build -x :datahub-web-react:build --parallel", + "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:prefect-plugin:build -x :datahub-frontend:build -x :datahub-web-react:build --parallel", "./gradlew :datahub-frontend:build :datahub-web-react:build --parallel", "./gradlew :metadata-ingestion-modules:airflow-plugin:build --parallel", - "./gradlew :metadata-ingestion-modules:prefect-datahub:build --parallel" + "./gradlew :metadata-ingestion-modules:prefect-plugin:build --parallel" ] timezone: [ diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml new file mode 100644 index 0000000000000..18cbd79f1156c --- /dev/null +++ b/.github/workflows/prefect-plugin.yml @@ -0,0 +1,78 @@ +name: Prefect Plugin +on: + push: + branches: + - master + paths: + - ".github/workflows/prefect-plugin.yml" + - "metadata-ingestion-modules/prefect-plugin/**" + - "metadata-ingestion/**" + - "metadata-models/**" + pull_request: + branches: + - master + paths: + - ".github/**" + - "metadata-ingestion-modules/prefect-plugin/**" + - "metadata-ingestion/**" + - "metadata-models/**" + release: + types: [published] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + prefect-plugin: + runs-on: ubuntu-latest + env: + SPARK_VERSION: 3.0.3 + DATAHUB_TELEMETRY_ENABLED: false + strategy: + matrix: + python-version: ["3.7", "3.10"] + include: + - python-version: "3.7" + - python-version: "3.10" + fail-fast: false + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install dependencies + run: ./metadata-ingestion/scripts/install_deps.sh + - name: Install prefect package and test (extras ${{ matrix.extraPythonRequirement }}) + run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:prefect-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick + - name: pip freeze show list installed + if: always() + run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && pip freeze + - uses: actions/upload-artifact@v3 + if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'prefect>=2.0.0' }} + with: + name: Test Results (Prefect Plugin ${{ matrix.python-version}}) + path: | + **/build/reports/tests/test/** + **/build/test-results/test/** + **/junit.*.xml + - name: Upload coverage to Codecov + if: always() + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + directory: . + fail_ci_if_error: false + flags: prefect-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }} + name: pytest-prefect + verbose: true + + event-file: + runs-on: ubuntu-latest + steps: + - name: Upload + uses: actions/upload-artifact@v3 + with: + name: Event File + path: ${{ github.event_path }} diff --git a/.github/workflows/test-results.yml b/.github/workflows/test-results.yml index 0153060692271..cdb89b1fa6b5d 100644 --- a/.github/workflows/test-results.yml +++ b/.github/workflows/test-results.yml @@ -2,7 +2,7 @@ name: Test Results on: workflow_run: - workflows: ["build & test", "metadata ingestion", "Airflow Plugin"] + workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Prefect Plugin"] types: - completed diff --git a/docs-website/build.gradle b/docs-website/build.gradle index 49f1031d8e3ef..22d5a8e79b955 100644 --- a/docs-website/build.gradle +++ b/docs-website/build.gradle @@ -72,7 +72,7 @@ task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall, generateGraphQLSchema, generateJsonSchema, ':metadata-ingestion:modelDocGen', ':metadata-ingestion:docGen', ':metadata-ingestion:buildWheel', ':metadata-ingestion-modules:airflow-plugin:buildWheel', - ':metadata-ingestion-modules:prefect-datahub:buildWheel'] ) { + ':metadata-ingestion-modules:prefect-plugin:buildWheel'] ) { inputs.files(projectMdFiles) outputs.cacheIf { true } args = ['run', 'generate'] diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts index fc4619165fc2a..7724ccc89f332 100644 --- a/docs-website/generateDocsDir.ts +++ b/docs-website/generateDocsDir.ts @@ -573,7 +573,7 @@ function copy_python_wheels(): void { const wheel_dirs = [ "../metadata-ingestion/dist", "../metadata-ingestion-modules/airflow-plugin/dist", - "../metadata-ingestion-modules/prefect-datahub/dist", + "../metadata-ingestion-modules/prefect-plugin/dist", ]; const wheel_output_directory = path.join(STATIC_DIRECTORY, "wheels"); diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index cf2000d416f44..f27b11c7cd30c 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -643,9 +643,9 @@ module.exports = { //"docs/how/graph-onboarding", //"docs/demo/graph-onboarding", //"metadata-ingestion-modules/airflow-plugin/README" - //"metadata-ingestion-modules/prefect-datahub/README" - //"metadata-ingestion-modules/prefect-datahub/docs/concept_mapping" - //"metadata-ingestion-modules/prefect-datahub/docs/datahub_emitter" + //"metadata-ingestion-modules/prefect-plugin/README" + //"metadata-ingestion-modules/prefect-plugin/docs/concept_mapping" + //"metadata-ingestion-modules/prefect-plugin/docs/datahub_emitter" // "metadata-ingestion/schedule_docs/datahub", // we can delete this // TODO: change the titles of these, removing the "What is..." portion from the sidebar" // "docs/what/entity", diff --git a/docs/lineage/prefect.md b/docs/lineage/prefect.md index 606f672405079..76ffa2edca9f4 100644 --- a/docs/lineage/prefect.md +++ b/docs/lineage/prefect.md @@ -46,4 +46,4 @@ prefect config set PREFECT_API_URL='http://127.0.0.1:4200/api' ``` ### Connection error for Datahub Rest URL -If you get ConnectionError: HTTPConnectionPool(host='localhost', port=8080), then in that case your GMS service is not up. \ No newline at end of file +If you get ConnectionError: HTTPConnectionPool(host='localhost', port=8080), then in that case your GMS service is not up. diff --git a/metadata-ingestion-modules/prefect-datahub/MANIFEST.in b/metadata-ingestion-modules/prefect-datahub/MANIFEST.in deleted file mode 100644 index 9e3fb02f8f704..0000000000000 --- a/metadata-ingestion-modules/prefect-datahub/MANIFEST.in +++ /dev/null @@ -1,14 +0,0 @@ -# Things to always exclude -global-exclude .git* -global-exclude .ipynb_checkpoints -global-exclude *.py[co] -global-exclude __pycache__/** - -# Top-level Config -include versioneer.py -include prefect_datahub/_version.py -include LICENSE -include MANIFEST.in -include setup.cfg -include requirements.txt -include requirements-dev.txt diff --git a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt b/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt deleted file mode 100644 index 4b3f59d7a9daa..0000000000000 --- a/metadata-ingestion-modules/prefect-datahub/requirements-dev.txt +++ /dev/null @@ -1,46 +0,0 @@ -pytest>=6.2.2 -black>=21.12b0 -flake8>=3.8.3 -flake8-tidy-imports>=4.3.0 -mypy>=0.920 -mkdocs -mkdocs-material -mkdocstrings[python] -isort>=5.7.0 -pre-commit -pytest-asyncio>=0.16.0 -mock; python_version < '3.8' -mkdocs-gen-files -interrogate -coverage>=5.1 -pillow -dataclasses>=0.6; python_version < '3.7' -typing_extensions>=3.10.0.2 -mypy_extensions>=0.4.3 -typing-inspect -pydantic>=1.5.1 -tox -deepdiff -requests-mock -freezegun -jsonpickle -build -twine -packaging -# mypy stubs packages -types-dataclasses -sqlalchemy-stubs -types-six -types-python-dateutil -types-requests -types-toml -types-PyYAML -types-freezegun -types-cachetools -# versions 0.1.13 and 0.1.14 seem to have issues -types-click==0.1.12 -types-tabulate -# avrogen package requires this -types-pytz -# For python 3.7 and importlib-metadata>=5.0.0, build failed with attribute error -importlib-metadata>=4.4.0,<5.0.0; python_version < '3.8' \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-datahub/requirements.txt b/metadata-ingestion-modules/prefect-datahub/requirements.txt deleted file mode 100644 index be3a952264ef5..0000000000000 --- a/metadata-ingestion-modules/prefect-datahub/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -prefect>=2.0.0 -acryl-datahub \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-datahub/setup.py b/metadata-ingestion-modules/prefect-datahub/setup.py deleted file mode 100644 index 9ff01aa9a7632..0000000000000 --- a/metadata-ingestion-modules/prefect-datahub/setup.py +++ /dev/null @@ -1,48 +0,0 @@ -from setuptools import find_packages, setup - -package_metadata: dict = {} -with open("./prefect_datahub/__init__.py") as fp: - exec(fp.read(), package_metadata) - -with open("requirements.txt") as install_requires_file: - install_requires = install_requires_file.read().strip().split("\n") - -with open("requirements-dev.txt") as dev_requires_file: - dev_requires = dev_requires_file.read().strip().split("\n") - -with open("README.md") as readme_file: - readme = readme_file.read() - -setup( - name=package_metadata["__package_name__"], - version=package_metadata["__version__"], - description="Metadata emitter for datahub", - license="Apache License 2.0", - author="Acryl Data", - author_email="shubham.jagtap@gslab.com", - keywords="prefect", - url="https://github.com/PrefectHQ/prefect-datahub", - long_description=readme, - long_description_content_type="text/markdown", - packages=find_packages(exclude=("tests", "docs")), - python_requires=">=3.7", - install_requires=install_requires, - extras_require={"dev": dev_requires}, - entry_points={ - "prefect.datahub": [ - "prefect_datahub = prefect_datahub.datahub_emitter:DatahubEmitter", - ] - }, - classifiers=[ - "Natural Language :: English", - "Intended Audience :: Developers", - "Intended Audience :: System Administrators", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Topic :: Software Development :: Libraries", - ], -) diff --git a/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py b/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py deleted file mode 100644 index 8c276bb6b393b..0000000000000 --- a/metadata-ingestion-modules/prefect-datahub/tests/test_block_standards.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest -from prefect.blocks.core import Block -from prefect.testing.standard_test_suites import BlockStandardTestSuite -from prefect.utilities.dispatch import get_registry_for_type -from prefect.utilities.importtools import to_qualified_name - - -def find_module_blocks(): - blocks = get_registry_for_type(Block) - module_blocks = [] - if blocks is not None: - module_blocks = [ - block - for block in blocks.values() - if to_qualified_name(block).startswith("prefect_datahub") - ] - return module_blocks - - -@pytest.mark.parametrize("block", find_module_blocks()) -class TestAllBlocksAdhereToStandards(BlockStandardTestSuite): - @pytest.fixture - def block(self, block): - return block diff --git a/metadata-ingestion-modules/prefect-datahub/.gitignore b/metadata-ingestion-modules/prefect-plugin/.gitignore similarity index 97% rename from metadata-ingestion-modules/prefect-datahub/.gitignore rename to metadata-ingestion-modules/prefect-plugin/.gitignore index d0108e8361a06..1d2916d00eabd 100644 --- a/metadata-ingestion-modules/prefect-datahub/.gitignore +++ b/metadata-ingestion-modules/prefect-plugin/.gitignore @@ -1,5 +1,5 @@ .envrc -src/datahub_airflow_plugin/__init__.py.bak +src/prefect_datahub/__init__.py.bak .vscode/ output pvenv36/ diff --git a/metadata-ingestion-modules/prefect-datahub/README.md b/metadata-ingestion-modules/prefect-plugin/README.md similarity index 67% rename from metadata-ingestion-modules/prefect-datahub/README.md rename to metadata-ingestion-modules/prefect-plugin/README.md index 1aedba8c5ca90..2548221fb5591 100644 --- a/metadata-ingestion-modules/prefect-datahub/README.md +++ b/metadata-ingestion-modules/prefect-plugin/README.md @@ -3,17 +3,15 @@

PyPI - - + + - - + +
- + - -

## Welcome! @@ -74,9 +72,8 @@ DatahubEmitter.load("BLOCK-NAME-PLACEHOLDER") After installing `prefect-datahub` and [saving the configution](#saving-configurations-to-a-block), you can easily use it within your prefect workflows to help you emit metadata event as show below! ```python -from datahub_provider.entities import Dataset from prefect import flow, task - +from prefect_datahub.dataset import Dataset from prefect_datahub.datahub_emitter import DatahubEmitter datahub_emitter = DatahubEmitter.load("MY_BLOCK_NAME") @@ -114,33 +111,16 @@ Requires an installation of Python 3.7+. We recommend using a Python virtual environment manager such as pipenv, conda or virtualenv. -These tasks are designed to work with Prefect 2.0. For more information about how to use Prefect, please refer to the [Prefect documentation](https://docs.prefect.io/). +These tasks are designed to work with Prefect 2.0.0 or higher. For more information about how to use Prefect, please refer to the [Prefect documentation](https://docs.prefect.io/). ### Feedback -If you encounter any bugs while using `prefect-datahub`, feel free to open an issue in the [prefect-datahub](https://github.com/shubhamjagtap639/prefect-datahub) repository. +If you encounter any bugs while using `prefect-datahub`, feel free to open an issue in the [datahub](https://github.com/datahub-project/datahub) repository. -If you have any questions or issues while using `prefect-datahub`, you can find help in either the [Prefect Discourse forum](https://discourse.prefect.io/) or the [Prefect Slack community](https://prefect.io/slack). +If you have any questions or issues while using `prefect-datahub`, you can find help in the [Prefect Slack community](https://prefect.io/slack). -Feel free to star or watch [`prefect-datahub`](https://github.com/shubhamjagtap639/prefect-datahub) for updates too! +Feel free to star or watch [`datahub`](https://github.com/datahub-project/datahub) for updates too! ### Contributing -If you'd like to help contribute to fix an issue or add a feature to `prefect-datahub`, please [propose changes through a pull request from a fork of the repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). - -Here are the steps: - -1. [Fork the repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo#forking-a-repository) -2. [Clone the forked repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo#cloning-your-forked-repository) -3. Install the repository and its dependencies: -``` -pip install -e ".[dev]" -``` -4. Make desired changes -5. Add tests -6. Insert an entry to [CHANGELOG.md](https://github.com/shubhamjagtap639/prefect-datahub/blob/main/CHANGELOG.md) -7. Install `pre-commit` to perform quality checks prior to commit: -``` -pre-commit install -``` -8. `git commit`, `git push`, and create a pull request +If you'd like to help contribute to fix an issue or add a feature to `prefect-datahub`, please refer to our [Contributing Guidelines](https://datahubproject.io/docs/contributing). diff --git a/metadata-ingestion-modules/prefect-datahub/build.gradle b/metadata-ingestion-modules/prefect-plugin/build.gradle similarity index 58% rename from metadata-ingestion-modules/prefect-datahub/build.gradle rename to metadata-ingestion-modules/prefect-plugin/build.gradle index 9502452272c1b..ced0b8da5b508 100644 --- a/metadata-ingestion-modules/prefect-datahub/build.gradle +++ b/metadata-ingestion-modules/prefect-plugin/build.gradle @@ -7,6 +7,10 @@ ext { venv_name = 'venv' } +if (!project.hasProperty("extra_pip_requirements")) { + ext.extra_pip_requirements = "" +} + def pip_install_command = "${venv_name}/bin/pip install -e ../../metadata-ingestion" task checkPythonVersion(type: Exec) { @@ -14,55 +18,63 @@ task checkPythonVersion(type: Exec) { } task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { + def sentinel_file = "${venv_name}/.venv_environment_sentinel" inputs.file file('setup.py') - outputs.dir("${venv_name}") - commandLine 'bash', '-c', "${python_executable} -m venv ${venv_name} && ${venv_name}/bin/python -m pip install --upgrade pip wheel 'setuptools>=63.0.0'" + outputs.file(sentinel_file) + commandLine 'bash', '-c', + "${python_executable} -m venv ${venv_name} &&" + + "${venv_name}/bin/python -m pip install --upgrade pip wheel 'setuptools>=63.0.0' && " + + "touch ${sentinel_file}" } -task installPackage(type: Exec, dependsOn: environmentSetup) { +task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingestion:codegen']) { + def sentinel_file = "${venv_name}/.build_install_package_sentinel" inputs.file file('setup.py') - outputs.dir("${venv_name}") - commandLine 'bash', '-x', '-c', "${pip_install_command} -e ." + outputs.file(sentinel_file) + // Workaround for https://github.com/yaml/pyyaml/issues/601. + // See https://github.com/yaml/pyyaml/issues/601#issuecomment-1638509577. + // and https://github.com/datahub-project/datahub/pull/8435. + commandLine 'bash', '-x', '-c', + "${pip_install_command} install 'Cython<3.0' 'PyYAML<6' --no-build-isolation && " + + "${pip_install_command} -e . ${extra_pip_requirements} &&" + + "touch ${sentinel_file}" } task install(dependsOn: [installPackage]) task installDev(type: Exec, dependsOn: [install]) { + def sentinel_file = "${venv_name}/.build_install_dev_sentinel" inputs.file file('setup.py') - outputs.dir("${venv_name}") - outputs.file("${venv_name}/.build_install_dev_sentinel") + outputs.file("${sentinel_file}") commandLine 'bash', '-x', '-c', - "${pip_install_command} -e .[dev] && touch ${venv_name}/.build_install_dev_sentinel" + "${pip_install_command} -e .[dev] ${extra_pip_requirements} && " + + "touch ${sentinel_file}" } task lint(type: Exec, dependsOn: installDev) { - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && black --check --diff prefect_datahub/ tests/ && isort --check --diff prefect_datahub/ tests/ && flake8 --count --statistics prefect_datahub/ tests/ && mypy prefect_datahub/ tests/" + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "black --check --diff src/ tests/ && " + + "isort --check --diff src/ tests/ && " + + "flake8 --count --statistics src/ tests/ && " + + "mypy --show-traceback --show-error-codes src/ tests/" } task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-x', '-c', "source ${venv_name}/bin/activate && " + - "black prefect_datahub/ tests/ && " + - "isort prefect_datahub/ tests/ && " + - "flake8 prefect_datahub/ tests/ && " + - "mypy prefect_datahub/ tests/ " -} - -task testQuick(type: Exec, dependsOn: installDev) { - // We can't enforce the coverage requirements if we run a subset of the tests. - inputs.files(project.fileTree(dir: "prefect_datahub/", include: "**/*.py")) - inputs.files(project.fileTree(dir: "tests/")) - outputs.dir("${venv_name}") - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" + "black src/ tests/ && " + + "isort src/ tests/ && " + + "flake8 src/ tests/ && " + + "mypy src/ tests/ " } task installDevTest(type: Exec, dependsOn: [installDev]) { + def sentinel_file = "${venv_name}/.build_install_dev_test_sentinel" inputs.file file('setup.py') outputs.dir("${venv_name}") - outputs.file("${venv_name}/.build_install_dev_test_sentinel") + outputs.file("${sentinel_file}") commandLine 'bash', '-x', '-c', - "${pip_install_command} -e .[dev,integration-tests] && touch ${venv_name}/.build_install_dev_test_sentinel" + "${pip_install_command} -e .[dev,integration-tests] && touch ${sentinel_file}" } def testFile = hasProperty('testFile') ? testFile : 'unknown' @@ -79,6 +91,16 @@ task testSingle(dependsOn: [installDevTest]) { } } +task testQuick(type: Exec, dependsOn: installDevTest) { + // We can't enforce the coverage requirements if we run a subset of the tests. + inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) + inputs.files(project.fileTree(dir: "tests/")) + outputs.dir("${venv_name}") + commandLine 'bash', '-x', '-c', + "source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" +} + + task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) { commandLine 'bash', '-x', '-c', "source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml" diff --git a/metadata-ingestion-modules/prefect-datahub/docs/concept_mapping.md b/metadata-ingestion-modules/prefect-plugin/docs/concept_mapping.md similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/docs/concept_mapping.md rename to metadata-ingestion-modules/prefect-plugin/docs/concept_mapping.md diff --git a/metadata-ingestion-modules/prefect-datahub/docs/datahub_emitter.md b/metadata-ingestion-modules/prefect-plugin/docs/datahub_emitter.md similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/docs/datahub_emitter.md rename to metadata-ingestion-modules/prefect-plugin/docs/datahub_emitter.md diff --git a/metadata-ingestion-modules/prefect-datahub/docs/gen_blocks_catalog.py b/metadata-ingestion-modules/prefect-plugin/docs/gen_blocks_catalog.py similarity index 95% rename from metadata-ingestion-modules/prefect-datahub/docs/gen_blocks_catalog.py rename to metadata-ingestion-modules/prefect-plugin/docs/gen_blocks_catalog.py index 7e406129028d1..b7be4c9a75fcc 100644 --- a/metadata-ingestion-modules/prefect-datahub/docs/gen_blocks_catalog.py +++ b/metadata-ingestion-modules/prefect-plugin/docs/gen_blocks_catalog.py @@ -43,8 +43,7 @@ def insert_blocks_catalog(generated_file): To register blocks in this module to [view and edit them](https://docs.prefect.io/ui/blocks/) - on Prefect Cloud, first [install the required packages]( - https://shubhamjagtap639.github.io/prefect-datahub/#installation), + on Prefect Cloud, first install the required packages, then ```bash prefect block register -m {COLLECTION_SLUG} diff --git a/metadata-ingestion-modules/prefect-datahub/docs/gen_examples_catalog.py b/metadata-ingestion-modules/prefect-plugin/docs/gen_examples_catalog.py similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/docs/gen_examples_catalog.py rename to metadata-ingestion-modules/prefect-plugin/docs/gen_examples_catalog.py diff --git a/metadata-ingestion-modules/prefect-datahub/docs/gen_home_page.py b/metadata-ingestion-modules/prefect-plugin/docs/gen_home_page.py similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/docs/gen_home_page.py rename to metadata-ingestion-modules/prefect-plugin/docs/gen_home_page.py diff --git a/metadata-ingestion-modules/prefect-datahub/docs/img/favicon.ico b/metadata-ingestion-modules/prefect-plugin/docs/img/favicon.ico similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/docs/img/favicon.ico rename to metadata-ingestion-modules/prefect-plugin/docs/img/favicon.ico diff --git a/metadata-ingestion-modules/prefect-datahub/docs/img/prefect-logo-mark-solid-white-500.png b/metadata-ingestion-modules/prefect-plugin/docs/img/prefect-logo-mark-solid-white-500.png similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/docs/img/prefect-logo-mark-solid-white-500.png rename to metadata-ingestion-modules/prefect-plugin/docs/img/prefect-logo-mark-solid-white-500.png diff --git a/metadata-ingestion-modules/prefect-datahub/docs/img/prefect-logo-white.png b/metadata-ingestion-modules/prefect-plugin/docs/img/prefect-logo-white.png similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/docs/img/prefect-logo-white.png rename to metadata-ingestion-modules/prefect-plugin/docs/img/prefect-logo-white.png diff --git a/metadata-ingestion-modules/prefect-datahub/docs/overrides/partials/integrations/analytics/custom.html b/metadata-ingestion-modules/prefect-plugin/docs/overrides/partials/integrations/analytics/custom.html similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/docs/overrides/partials/integrations/analytics/custom.html rename to metadata-ingestion-modules/prefect-plugin/docs/overrides/partials/integrations/analytics/custom.html diff --git a/metadata-ingestion-modules/prefect-datahub/docs/stylesheets/extra.css b/metadata-ingestion-modules/prefect-plugin/docs/stylesheets/extra.css similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/docs/stylesheets/extra.css rename to metadata-ingestion-modules/prefect-plugin/docs/stylesheets/extra.css diff --git a/metadata-ingestion-modules/prefect-datahub/mkdocs.yml b/metadata-ingestion-modules/prefect-plugin/mkdocs.yml similarity index 92% rename from metadata-ingestion-modules/prefect-datahub/mkdocs.yml rename to metadata-ingestion-modules/prefect-plugin/mkdocs.yml index 968d6c0b655a9..e7ee84211fdae 100644 --- a/metadata-ingestion-modules/prefect-datahub/mkdocs.yml +++ b/metadata-ingestion-modules/prefect-plugin/mkdocs.yml @@ -1,6 +1,6 @@ site_name: prefect-datahub -site_url: https://shubhamjagtap639.github.io/prefect-datahub -repo_url: https://github.com/shubhamjagtap639/prefect-datahub +site_url: https://datahub-project.github.io/datahub +repo_url: https://github.com/datahub-project/datahub edit_uri: edit/main/docs/ theme: name: material @@ -68,7 +68,7 @@ plugins: show_signature: False heading_level: 1 watch: - - prefect_datahub/ + - src/prefect_datahub/ - README.md nav: diff --git a/metadata-ingestion-modules/prefect-datahub/pyproject.toml b/metadata-ingestion-modules/prefect-plugin/pyproject.toml similarity index 90% rename from metadata-ingestion-modules/prefect-datahub/pyproject.toml rename to metadata-ingestion-modules/prefect-plugin/pyproject.toml index 83b79e3146176..fba81486b9f67 100644 --- a/metadata-ingestion-modules/prefect-datahub/pyproject.toml +++ b/metadata-ingestion-modules/prefect-plugin/pyproject.toml @@ -9,7 +9,6 @@ extend-exclude = ''' ^/tmp ''' include = '\.pyi?$' -target-version = ['py36', 'py37', 'py38'] [tool.isort] indent = ' ' diff --git a/metadata-ingestion-modules/prefect-datahub/scripts/release.sh b/metadata-ingestion-modules/prefect-plugin/scripts/release.sh similarity index 63% rename from metadata-ingestion-modules/prefect-datahub/scripts/release.sh rename to metadata-ingestion-modules/prefect-plugin/scripts/release.sh index 17faff8c338e3..f01287d3e3731 100755 --- a/metadata-ingestion-modules/prefect-datahub/scripts/release.sh +++ b/metadata-ingestion-modules/prefect-plugin/scripts/release.sh @@ -10,12 +10,12 @@ fi MODULE=prefect_datahub # Check packaging constraint. -python -c 'import setuptools; where="./prefect_datahub"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' +python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' if [[ ${RELEASE_VERSION:-} ]]; then # Replace version with RELEASE_VERSION env variable - sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" ${MODULE}/__init__.py + sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/${MODULE}/__init__.py else - vim ${MODULE}/__init__.py + vim src/${MODULE}/__init__.py fi rm -rf build dist || true @@ -23,4 +23,4 @@ python -m build if [[ ! ${RELEASE_SKIP_UPLOAD:-} ]]; then python -m twine upload 'dist/*' fi -git restore ${MODULE}/__init__.py +git restore src/${MODULE}/__init__.py diff --git a/metadata-ingestion-modules/prefect-datahub/setup.cfg b/metadata-ingestion-modules/prefect-plugin/setup.cfg similarity index 88% rename from metadata-ingestion-modules/prefect-datahub/setup.cfg rename to metadata-ingestion-modules/prefect-plugin/setup.cfg index 2232f9acd13df..c59a99fa8aec0 100644 --- a/metadata-ingestion-modules/prefect-datahub/setup.cfg +++ b/metadata-ingestion-modules/prefect-plugin/setup.cfg @@ -41,9 +41,11 @@ ignore_missing_imports = no [tool:pytest] asyncio_mode = auto +addopts = --cov=src --cov-report term-missing --cov-config setup.cfg --strict-markers testpaths = - tests + tests/unit + tests/integration [coverage:run] # Because of some quirks in the way setup.cfg, coverage.py, pytest-cov, @@ -51,12 +53,12 @@ testpaths = # See https://pytest-cov.readthedocs.io/en/latest/config.html and # https://coverage.readthedocs.io/en/coverage-5.0/config.html. # We also have some additional pytest/cov config options in tox.ini. -# source = prefect_datahub +# source = src [coverage:paths] # This is necessary for tox-based coverage to be counted properly. source = - prefect_datahub + src */site-packages [coverage:report] @@ -67,4 +69,6 @@ exclude_lines = pragma: no cover @abstract if TYPE_CHECKING: -#omit = +omit = + # omit example jobs + src/prefect_datahub/example/* diff --git a/metadata-ingestion-modules/prefect-plugin/setup.py b/metadata-ingestion-modules/prefect-plugin/setup.py new file mode 100644 index 0000000000000..10396f5192291 --- /dev/null +++ b/metadata-ingestion-modules/prefect-plugin/setup.py @@ -0,0 +1,138 @@ +import os +import pathlib + +import setuptools + +package_metadata: dict = {} +with open("./src/prefect_datahub/__init__.py") as fp: + exec(fp.read(), package_metadata) + + +def get_long_description(): + root = os.path.dirname(__file__) + return pathlib.Path(os.path.join(root, "README.md")).read_text() + + +rest_common = {"requests", "requests_file"} + +base_requirements = { + # Actual dependencies. + "prefect >= 2.0.0", + *rest_common, + f"acryl-datahub == {package_metadata['__version__']}", +} + + +mypy_stubs = { + "types-dataclasses", + "sqlalchemy-stubs", + "types-pkg_resources", + "types-six", + "types-python-dateutil", + "types-requests", + "types-toml", + "types-PyYAML", + "types-freezegun", + "types-cachetools", + # versions 0.1.13 and 0.1.14 seem to have issues + "types-click==0.1.12", + "types-tabulate", + # avrogen package requires this + "types-pytz", +} + +base_dev_requirements = { + *base_requirements, + *mypy_stubs, + "black==22.12.0", + "coverage>=5.1", + "flake8>=3.8.3", + "flake8-tidy-imports>=4.3.0", + "isort>=5.7.0", + "mypy>=1.4.0", + # pydantic 1.8.2 is incompatible with mypy 0.910. + # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. + "pydantic>=1.10", + "pytest>=6.2.2", + "pytest-asyncio>=0.16.0", + "pytest-cov>=2.8.1", + "tox", + "deepdiff", + "requests-mock", + "freezegun", + "jsonpickle", + "build", + "twine", + "packaging", + # Prefect block integration required packages + "mkdocs", + "mkdocs-material", + "mkdocstrings[python]", + "mock; python_version < '3.8'", + "mkdocs-gen-files", + "Pillow", + "flaky", +} + +dev_requirements = { + *base_dev_requirements, +} + + +entry_points = { + "prefect.block": "prefect-datahub = prefect_datahub.prefect_datahub:DatahubEmitter" +} + + +setuptools.setup( + # Package metadata. + name=package_metadata["__package_name__"], + version=package_metadata["__version__"], + url="https://datahubproject.io/", + project_urls={ + "Documentation": "https://datahubproject.io/docs/", + "Source": "https://github.com/datahub-project/datahub", + "Changelog": "https://github.com/datahub-project/datahub/releases", + }, + license="Apache License 2.0", + description="Datahub prefect block to capture executions and send to Datahub", + long_description=get_long_description(), + long_description_content_type="text/markdown", + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: System Administrators", + "License :: OSI Approved", + "License :: OSI Approved :: Apache Software License", + "Operating System :: Unix", + "Operating System :: POSIX :: Linux", + "Environment :: Console", + "Environment :: MacOS X", + "Topic :: Software Development", + ], + # Package info. + zip_safe=False, + python_requires=">=3.7", + package_dir={"": "src"}, + packages=setuptools.find_namespace_packages(where="./src"), + entry_points=entry_points, + # Dependencies. + install_requires=list(base_requirements), + extras_require={ + "dev": list(dev_requirements), + "datahub-kafka": [ + f"acryl-datahub[datahub-kafka] == {package_metadata['__version__']}" + ], + "integration-tests": [ + f"acryl-datahub[datahub-kafka] == {package_metadata['__version__']}", + ], + }, +) diff --git a/metadata-ingestion-modules/prefect-datahub/prefect_datahub/__init__.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py similarity index 78% rename from metadata-ingestion-modules/prefect-datahub/prefect_datahub/__init__.py rename to metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py index 3e00a07d907bc..c53a52e2cae2f 100644 --- a/metadata-ingestion-modules/prefect-datahub/prefect_datahub/__init__.py +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py @@ -1,6 +1,6 @@ # Published at https://pypi.org/project/acryl-datahub/. __package_name__ = "prefect-datahub" -__version__ = "0.0.0.dev1" +__version__ = "0.0.0.dev0" def is_dev_mode() -> bool: @@ -17,5 +17,5 @@ def get_provider_info(): return { "package-name": f"{__package_name__}", "name": f"{__package_name__}", - "description": "datahub emitter to emit prefect metadata", + "description": "Datahub prefect block to capture executions and send to Datahub", } diff --git a/metadata-ingestion-modules/prefect-datahub/prefect_datahub/datahub_emitter.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py similarity index 97% rename from metadata-ingestion-modules/prefect-datahub/prefect_datahub/datahub_emitter.py rename to metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py index 8ce16bd8ab763..e8f47c8f6cd16 100644 --- a/metadata-ingestion-modules/prefect-datahub/prefect_datahub/datahub_emitter.py +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py @@ -16,7 +16,6 @@ from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub.utilities.urns.dataset_urn import DatasetUrn -from datahub_provider.entities import _Entity from prefect import get_run_logger from prefect.blocks.core import Block from prefect.client import cloud, orchestration @@ -24,7 +23,9 @@ from prefect.client.schemas.objects import Flow from prefect.context import FlowRunContext, TaskRunContext from prefect.settings import PREFECT_API_URL -from pydantic import Field, HttpUrl, parse_obj_as +from pydantic import Field + +from prefect_datahub.dataset import _Entity ORCHESTRATOR = "prefect" @@ -107,20 +108,11 @@ class DatahubEmitter(Block): """ _block_type_name: Optional[str] = "datahub emitter" - # replace this with a relevant logo; defaults to Prefect logo - _logo_url = parse_obj_as( - HttpUrl, "https://datahubproject.io/img/datahub-logo-color-mark.svg" - ) # noqa - _documentation_url = parse_obj_as( - HttpUrl, - "https://shubhamjagtap639.github.io/prefect-datahub/datahub_emitter/" - "#prefect-datahub.datahub_emitter.DatahubEmitter", - ) # noqa datahub_rest_url: str = Field( default="http://localhost:8080", title="Datahub rest url", - description="Datahub GMS Rest URL. Example: http://localhost:8080", + description="Datahub GMS Rest URL. Example: http://localhost:8080.", ) env: str = Field( @@ -555,9 +547,8 @@ def add_task( Example: Emit the task metadata as show below: ```python - from datahub_provider.entities import Dataset from prefect import flow, task - + from prefect_datahub.dataset import Dataset from prefect_datahub.datahub_emitter import DatahubEmitter datahub_emitter = DatahubEmitter.load("MY_BLOCK_NAME") @@ -604,7 +595,6 @@ def emit_flow(self) -> None: Emit the flow metadata as show below: ```python from prefect import flow, task - from prefect_datahub.datahub_emitter import DatahubEmitter datahub_emitter = DatahubEmitter.load("MY_BLOCK_NAME") diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/dataset.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/dataset.py new file mode 100644 index 0000000000000..e2711d0925d97 --- /dev/null +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/dataset.py @@ -0,0 +1,46 @@ +from abc import abstractmethod +from typing import Optional + +import attr +import datahub.emitter.mce_builder as builder +from datahub.utilities.urns.urn import guess_entity_type + + +class _Entity: + @property + @abstractmethod + def urn(self) -> str: + pass + + +@attr.s(auto_attribs=True, str=True) +class Dataset(_Entity): + platform: str + name: str + env: str = builder.DEFAULT_ENV + platform_instance: Optional[str] = None + + @property + def urn(self): + return builder.make_dataset_urn_with_platform_instance( + platform=self.platform, + name=self.name, + platform_instance=self.platform_instance, + env=self.env, + ) + + +@attr.s(str=True) +class Urn(_Entity): + _urn: str = attr.ib() + + @_urn.validator + def _validate_urn(self, attribute, value): + if not value.startswith("urn:"): + raise ValueError("invalid urn provided: urns must start with 'urn:'") + if guess_entity_type(value) != "dataset": + raise ValueError("Datajob input/output currently only supports datasets") + + @property + def urn(self): + return self._urn diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/__init__.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/flow.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/flow.py new file mode 100644 index 0000000000000..cc4a6fe1b20be --- /dev/null +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/flow.py @@ -0,0 +1,32 @@ +from prefect import flow, task + +from prefect_datahub.datahub_emitter import DatahubEmitter +from prefect_datahub.dataset import Dataset + +datahub_emitter = DatahubEmitter.load("datahub-block") + + +@task(name="Extract", description="Extract the data") +def extract(): + data = "This is data" + return data + + +@task(name="Transform", description="Transform the data") +def transform(data): + data = data.split(" ") + datahub_emitter.add_task( + inputs=[Dataset("snowflake", "mydb.schema.tableA")], + outputs=[Dataset("snowflake", "mydb.schema.tableC")], + ) + return data + + +@flow(name="ETL", description="Extract transform load flow") +def etl(): + data = extract() + data = transform(data) + datahub_emitter.emit_flow() + + +etl() diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/save_block.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/save_block.py new file mode 100644 index 0000000000000..52140cf9842e2 --- /dev/null +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/save_block.py @@ -0,0 +1,7 @@ +from prefect_datahub.datahub_emitter import DatahubEmitter + +DatahubEmitter( + datahub_rest_url="http://localhost:8080", + env="PROD", + platform_instance="local_prefect", +).save("datahub-block", overwrite=True) diff --git a/metadata-ingestion-modules/prefect-plugin/tests/integration/integration_test_dummy.py b/metadata-ingestion-modules/prefect-plugin/tests/integration/integration_test_dummy.py new file mode 100644 index 0000000000000..10cf3ad0a608a --- /dev/null +++ b/metadata-ingestion-modules/prefect-plugin/tests/integration/integration_test_dummy.py @@ -0,0 +1,2 @@ +def test_dummy(): + pass diff --git a/metadata-ingestion-modules/prefect-datahub/tests/conftest.py b/metadata-ingestion-modules/prefect-plugin/tests/unit/conftest.py similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/tests/conftest.py rename to metadata-ingestion-modules/prefect-plugin/tests/unit/conftest.py diff --git a/metadata-ingestion-modules/prefect-plugin/tests/unit/test_block_standards.py b/metadata-ingestion-modules/prefect-plugin/tests/unit/test_block_standards.py new file mode 100644 index 0000000000000..76794bc0fb27a --- /dev/null +++ b/metadata-ingestion-modules/prefect-plugin/tests/unit/test_block_standards.py @@ -0,0 +1,45 @@ +import re +from typing import Type + +import pytest +from prefect.blocks.core import Block + +from prefect_datahub.datahub_emitter import DatahubEmitter + + +@pytest.mark.parametrize("block", [DatahubEmitter]) +class TestAllBlocksAdhereToStandards: + @pytest.fixture + def block(self, block): + return block + + def test_has_a_description(self, block: Type[Block]) -> None: + assert block.get_description() + + def test_all_fields_have_a_description(self, block: Type[Block]) -> None: + for name, field in block.__fields__.items(): + if Block.is_block_class(field.type_): + # TODO: Block field descriptions aren't currently handled by the UI, so block + # fields are currently excluded from this test. Once block field descriptions are + # supported by the UI, remove this clause. + continue + assert ( + field.field_info.description + ), f"{block.__name__} is missing a description on {name}" + assert field.field_info.description.endswith( + "." + ), f"{name} description on {block.__name__} does not end with a period" + + def test_has_a_valid_code_example(self, block: Type[Block]) -> None: + code_example = block.get_code_example() + assert code_example is not None, f"{block.__name__} is missing a code example" + import_pattern = rf"from .* import {block.__name__}" + assert re.search(import_pattern, code_example) is not None, ( + f"The code example for {block.__name__} is missing an import statement" + f" matching the pattern {import_pattern}" + ) + block_load_pattern = rf'.* = {block.__name__}\.load\("BLOCK_NAME"\)' + assert re.search(block_load_pattern, code_example), ( + f"The code example for {block.__name__} is missing a .load statement" + f" matching the pattern {block_load_pattern}" + ) diff --git a/metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py b/metadata-ingestion-modules/prefect-plugin/tests/unit/test_datahub_emitter.py similarity index 99% rename from metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py rename to metadata-ingestion-modules/prefect-plugin/tests/unit/test_datahub_emitter.py index e4499f3215b9a..1f03132b12210 100644 --- a/metadata-ingestion-modules/prefect-datahub/tests/test_datahub_emitter.py +++ b/metadata-ingestion-modules/prefect-plugin/tests/unit/test_datahub_emitter.py @@ -4,9 +4,9 @@ from datahub.api.entities.datajob import DataJob from datahub.utilities.urns.dataset_urn import DatasetUrn -from datahub_provider.entities import Dataset, _Entity from prefect_datahub.datahub_emitter import DatahubEmitter +from prefect_datahub.dataset import Dataset, _Entity @patch("prefect_datahub.datahub_emitter.DatahubRestEmitter", autospec=True) diff --git a/metadata-ingestion-modules/prefect-datahub/tox.ini b/metadata-ingestion-modules/prefect-plugin/tox.ini similarity index 100% rename from metadata-ingestion-modules/prefect-datahub/tox.ini rename to metadata-ingestion-modules/prefect-plugin/tox.ini diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md index f529590e2ab39..0eef5c23fbdc8 100644 --- a/metadata-ingestion/developing.md +++ b/metadata-ingestion/developing.md @@ -36,6 +36,16 @@ cd metadata-ingestion-modules/airflow-plugin source venv/bin/activate datahub version # should print "DataHub CLI version: unavailable (installed in develop mode)" ``` +### (Optional) Set up your Python environment for developing on Prefect Plugin + +From the repository root: + +```shell +cd metadata-ingestion-modules/prefect-plugin +../../gradlew :metadata-ingestion-modules:prefect-plugin:installDev +source venv/bin/activate +datahub version # should print "DataHub CLI version: unavailable (installed in develop mode)" +``` ### Common setup issues Common issues (click to expand): diff --git a/settings.gradle b/settings.gradle index 5ca2cad30ef12..8b3a8eb699cd9 100644 --- a/settings.gradle +++ b/settings.gradle @@ -56,7 +56,7 @@ include 'metadata-integration:java:datahub-client' include 'metadata-integration:java:datahub-protobuf' include 'ingestion-scheduler' include 'metadata-ingestion-modules:airflow-plugin' -include 'metadata-ingestion-modules:prefect-datahub' +include 'metadata-ingestion-modules:prefect-plugin' include 'smoke-test' include 'metadata-auth:auth-api' include 'metadata-service:schema-registry-api' From e4299d4458b7a1901b0c13f4a7c3ba85054fe16a Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Wed, 13 Sep 2023 15:31:31 +0530 Subject: [PATCH 17/21] Add epoch 1 for dev build versions --- .../prefect-plugin/src/prefect_datahub/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py index c53a52e2cae2f..8cc65f9010613 100644 --- a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py @@ -1,6 +1,6 @@ # Published at https://pypi.org/project/acryl-datahub/. __package_name__ = "prefect-datahub" -__version__ = "0.0.0.dev0" +__version__ = "1!0.0.0.dev0" def is_dev_mode() -> bool: From cfde3ae4b0385c3abaaad48b5e63014fce778be7 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Wed, 13 Sep 2023 16:16:05 +0530 Subject: [PATCH 18/21] build error fixed --- metadata-ingestion-modules/prefect-plugin/setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metadata-ingestion-modules/prefect-plugin/setup.py b/metadata-ingestion-modules/prefect-plugin/setup.py index 10396f5192291..9e402ace205d0 100644 --- a/metadata-ingestion-modules/prefect-plugin/setup.py +++ b/metadata-ingestion-modules/prefect-plugin/setup.py @@ -16,6 +16,8 @@ def get_long_description(): rest_common = {"requests", "requests_file"} base_requirements = { + # For python 3.7 and importlib-metadata>=5.0.0, build failed with attribute error + "importlib-metadata>=4.4.0,<5.0.0; python_version < '3.8'" # Actual dependencies. "prefect >= 2.0.0", *rest_common, From f963b55389d154ac1bc143701e80cc411a1c8d27 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Wed, 13 Sep 2023 16:26:41 +0530 Subject: [PATCH 19/21] syntax error resolved --- metadata-ingestion-modules/prefect-plugin/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion-modules/prefect-plugin/setup.py b/metadata-ingestion-modules/prefect-plugin/setup.py index 9e402ace205d0..40b10e099b02e 100644 --- a/metadata-ingestion-modules/prefect-plugin/setup.py +++ b/metadata-ingestion-modules/prefect-plugin/setup.py @@ -17,7 +17,7 @@ def get_long_description(): base_requirements = { # For python 3.7 and importlib-metadata>=5.0.0, build failed with attribute error - "importlib-metadata>=4.4.0,<5.0.0; python_version < '3.8'" + "importlib-metadata>=4.4.0,<5.0.0; python_version < '3.8'", # Actual dependencies. "prefect >= 2.0.0", *rest_common, From eefb5763656429f300e4196302646859c2f34ef7 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Fri, 9 Feb 2024 01:14:56 +0530 Subject: [PATCH 20/21] Address review comments --- docs/lineage/prefect.md | 94 +++- .../prefect-plugin/docs/concept_mapping.md | 12 - .../prefect-plugin/docs/datahub_emitter.md | 2 - .../prefect-plugin/docs/gen_blocks_catalog.py | 102 ---- .../docs/gen_examples_catalog.py | 120 ----- .../prefect-plugin/docs/gen_home_page.py | 21 - .../prefect-plugin/docs/img/favicon.ico | Bin 15406 -> 0 bytes .../img/prefect-logo-mark-solid-white-500.png | Bin 16294 -> 0 bytes .../docs/img/prefect-logo-white.png | Bin 2214 -> 0 bytes .../integrations/analytics/custom.html | 16 - .../prefect-plugin/docs/stylesheets/extra.css | 114 ---- .../prefect-plugin/mkdocs.yml | 81 --- .../prefect-plugin/setup.py | 21 +- .../src/prefect_datahub/datahub_emitter.py | 2 +- .../{dataset.py => entities.py} | 0 .../src/prefect_datahub/example/flow.py | 2 +- .../prefect-plugin/tests/unit/conftest.py | 496 ----------------- .../tests/unit/test_datahub_emitter.py | 498 +++++++++++++++++- .../prefect-plugin/tox.ini | 35 -- 19 files changed, 588 insertions(+), 1028 deletions(-) delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/concept_mapping.md delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/datahub_emitter.md delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/gen_blocks_catalog.py delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/gen_examples_catalog.py delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/gen_home_page.py delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/img/favicon.ico delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/img/prefect-logo-mark-solid-white-500.png delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/img/prefect-logo-white.png delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/overrides/partials/integrations/analytics/custom.html delete mode 100644 metadata-ingestion-modules/prefect-plugin/docs/stylesheets/extra.css delete mode 100644 metadata-ingestion-modules/prefect-plugin/mkdocs.yml rename metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/{dataset.py => entities.py} (100%) delete mode 100644 metadata-ingestion-modules/prefect-plugin/tests/unit/conftest.py delete mode 100644 metadata-ingestion-modules/prefect-plugin/tox.ini diff --git a/docs/lineage/prefect.md b/docs/lineage/prefect.md index 76ffa2edca9f4..1246e781142d7 100644 --- a/docs/lineage/prefect.md +++ b/docs/lineage/prefect.md @@ -8,13 +8,13 @@ DataHub supports integration of ## What is Prefect Datahub Block? -Blocks are primitive within Prefect that enable the storage of configuration and provide an interface for interacting with external systems. We integrated [prefect-datahub](https://prefecthq.github.io/prefect-datahub/) block which use [Datahub Rest](../../metadata-ingestion/sink_docs/datahub.md#datahub-rest) emitter to emit metadata events while running prefect flow. +Blocks are primitive within Prefect that enable the storage of configuration and provide an interface for interacting with external systems. We integrated `prefect-datahub` block which use [Datahub Rest](../../metadata-ingestion/sink_docs/datahub.md#datahub-rest) emitter to emit metadata events while running prefect flow. ## Prerequisites to use Prefect Datahub Block 1. You need to use either Prefect Cloud (recommended) or the self hosted Prefect server. -2. Refer [Cloud Quickstart](https://docs.prefect.io/2.10.13/cloud/cloud-quickstart/) to setup Prefect Cloud. -3. Refer [Host Prefect server](https://docs.prefect.io/2.10.13/host/) to setup self hosted Prefect server. +2. Refer [Cloud Quickstart](https://docs.prefect.io/latest/getting-started/quickstart/) to setup Prefect Cloud. +3. Refer [Host Prefect server](https://docs.prefect.io/latest/guides/host/) to setup self hosted Prefect server. 4. Make sure the Prefect api url is set correctly. You can check it by running below command: ```shell prefect profile inspect @@ -24,7 +24,93 @@ prefect profile inspect ## Setup -For setup details please refer [prefect-datahub](https://prefecthq.github.io/prefect-datahub/). +### Installation + +Install `prefect-datahub` with `pip`: + +```shell +pip install 'prefect-datahub' +``` + +Requires an installation of Python 3.7+. + +### Saving configurations to a block + +This is a one-time activity, where you can save the configuration on the [Prefect block document store](https://docs.prefect.io/latest/concepts/blocks/#saving-blocks). +While saving you can provide below configurations. Default value will get set if not provided while saving the configuration to block. + +Config | Type | Default | Description +--- | --- | --- | --- +datahub_rest_url | `str` | *http://localhost:8080* | DataHub GMS REST URL +env | `str` | *PROD* | The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer [here](https://datahubproject.io/docs/graphql/enums/#fabrictype). +platform_instance | `str` | *None* | The instance of the platform that all assets produced by this recipe belong to. For more detail please refer [here](https://datahubproject.io/docs/platform-instances/). + +```python +from prefect_datahub.datahub_emitter import DatahubEmitter +DatahubEmitter( + datahub_rest_url="http://localhost:8080", + env="PROD", + platform_instance="local_prefect" +).save("BLOCK-NAME-PLACEHOLDER") +``` + +Congrats! You can now load the saved block to use your configurations in your Flow code: + +```python +from prefect_datahub.datahub_emitter import DatahubEmitter +DatahubEmitter.load("BLOCK-NAME-PLACEHOLDER") +``` + +!!! info "Registering blocks" + + Register blocks in this module to + [view and edit them](https://docs.prefect.io/ui/blocks/) + on Prefect Cloud: + + ```bash + prefect block register -m prefect_datahub + ``` + +### Load the saved block in prefect workflows + +After installing `prefect-datahub` and [saving the configution](#saving-configurations-to-a-block), you can easily use it within your prefect workflows to help you emit metadata event as show below! + +```python +from prefect import flow, task +from prefect_datahub.dataset import Dataset +from prefect_datahub.datahub_emitter import DatahubEmitter + +datahub_emitter = DatahubEmitter.load("MY_BLOCK_NAME") + +@task(name="Transform", description="Transform the data") +def transform(data): + data = data.split(" ") + datahub_emitter.add_task( + inputs=[Dataset("snowflake", "mydb.schema.tableA")], + outputs=[Dataset("snowflake", "mydb.schema.tableC")], + ) + return data + +@flow(name="ETL flow", description="Extract transform load flow") +def etl(): + data = transform("This is data") + datahub_emitter.emit_flow() +``` + +**Note**: To emit the tasks, user compulsory need to emit flow. Otherwise nothing will get emit. + +## Concept mapping + +Prefect concepts are documented [here](https://docs.prefect.io/latest/concepts/), and datahub concepts are documented [here](https://datahubproject.io/docs/what-is-datahub/datahub-concepts). + +Prefect Concept | DataHub Concept +--- | --- +[Flow](https://docs.prefect.io/latest/concepts/flows/) | [DataFlow](https://datahubproject.io/docs/generated/metamodel/entities/dataflow/) +[Flow Run](https://docs.prefect.io/latest/concepts/flows/#flow-runs) | [DataProcessInstance](https://datahubproject.io/docs/generated/metamodel/entities/dataprocessinstance) +[Task](https://docs.prefect.io/latest/concepts/tasks/) | [DataJob](https://datahubproject.io/docs/generated/metamodel/entities/datajob/) +[Task Run](https://docs.prefect.io/latest/concepts/tasks/#tasks) | [DataProcessInstance](https://datahubproject.io/docs/generated/metamodel/entities/dataprocessinstance) +[Task Tag](https://docs.prefect.io/latest/concepts/tasks/#tags) | [Tag](https://datahubproject.io/docs/generated/metamodel/entities/tag/) + ## How to validate saved block and emit of metadata diff --git a/metadata-ingestion-modules/prefect-plugin/docs/concept_mapping.md b/metadata-ingestion-modules/prefect-plugin/docs/concept_mapping.md deleted file mode 100644 index b6d405596e733..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/docs/concept_mapping.md +++ /dev/null @@ -1,12 +0,0 @@ -# Prefect and Datahub concept mapping - - -Prefect concepts are documented [here](https://docs.prefect.io/latest/concepts/), and datahub concepts are documented [here](https://datahubproject.io/docs/what-is-datahub/datahub-concepts). - -Prefect Concept | DataHub Concept ---- | --- -[Flow](https://docs.prefect.io/2.10.13/concepts/flows/#flows) | [DataFlow](https://datahubproject.io/docs/generated/metamodel/entities/dataflow/) -[Flow Run](https://docs.prefect.io/latest/concepts/flows/#flow-runs) | [DataProcessInstance](https://datahubproject.io/docs/generated/metamodel/entities/dataprocessinstance) -[Task](https://docs.prefect.io/2.10.13/concepts/tasks/#tasks) | [DataJob](https://datahubproject.io/docs/generated/metamodel/entities/datajob/) -[Task Run](https://docs.prefect.io/latest/concepts/tasks/#tasks) | [DataProcessInstance](https://datahubproject.io/docs/generated/metamodel/entities/dataprocessinstance) -[Task Tag](https://docs.prefect.io/latest/concepts/tasks/#tags) | [Tag](https://datahubproject.io/docs/generated/metamodel/entities/tag/) diff --git a/metadata-ingestion-modules/prefect-plugin/docs/datahub_emitter.md b/metadata-ingestion-modules/prefect-plugin/docs/datahub_emitter.md deleted file mode 100644 index 407396b30c274..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/docs/datahub_emitter.md +++ /dev/null @@ -1,2 +0,0 @@ -# Datahub Emitter -::: prefect_datahub.datahub_emitter diff --git a/metadata-ingestion-modules/prefect-plugin/docs/gen_blocks_catalog.py b/metadata-ingestion-modules/prefect-plugin/docs/gen_blocks_catalog.py deleted file mode 100644 index b7be4c9a75fcc..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/docs/gen_blocks_catalog.py +++ /dev/null @@ -1,102 +0,0 @@ -""" -Discovers all blocks and generates a list of them in the docs -under the Blocks Catalog heading. -""" - -from pathlib import Path -from textwrap import dedent - -import mkdocs_gen_files -from prefect.blocks.core import Block -from prefect.utilities.dispatch import get_registry_for_type -from prefect.utilities.importtools import from_qualified_name, to_qualified_name - -COLLECTION_SLUG = "prefect_datahub" - - -def find_module_blocks(): - blocks = get_registry_for_type(Block) - collection_blocks = [ - block - for block in blocks.values() - if to_qualified_name(block).startswith(COLLECTION_SLUG) - ] - module_blocks = {} - for block in collection_blocks: - block_name = block.__name__ - module_nesting = tuple(to_qualified_name(block).split(".")[1:-1]) - if module_nesting not in module_blocks: - module_blocks[module_nesting] = [] - module_blocks[module_nesting].append(block_name) - return module_blocks - - -def insert_blocks_catalog(generated_file): - module_blocks = find_module_blocks() - if len(module_blocks) == 0: - return - generated_file.write( - dedent( - f""" - Below is a list of Blocks available for registration in - `prefect-datahub`. - - To register blocks in this module to - [view and edit them](https://docs.prefect.io/ui/blocks/) - on Prefect Cloud, first install the required packages, - then - ```bash - prefect block register -m {COLLECTION_SLUG} - ``` - """ # noqa - ) - ) - generated_file.write( - "Note, to use the `load` method on Blocks, you must already have a block document " # noqa - "[saved through code](https://docs.prefect.io/concepts/blocks/#saving-blocks) " # noqa - "or [saved through the UI](https://docs.prefect.io/ui/blocks/).\n" - ) - for module_nesting, block_names in module_blocks.items(): - module_path = f"{COLLECTION_SLUG}." + " ".join(module_nesting) - module_title = ( - module_path.replace(COLLECTION_SLUG, "") - .lstrip(".") - .replace("_", " ") - .title() - ) - generated_file.write(f"## [{module_title} Module][{module_path}]\n") - for block_name in block_names: - block_obj = from_qualified_name(f"{module_path}.{block_name}") - block_description = block_obj.get_description() - if not block_description.endswith("."): - block_description += "." - generated_file.write( - f"[{block_name}][{module_path}.{block_name}]\n\n{block_description}\n\n" - ) - generated_file.write( - dedent( - f""" - To load the {block_name}: - ```python - from prefect import flow - from {module_path} import {block_name} - - @flow - def my_flow(): - my_block = {block_name}.load("MY_BLOCK_NAME") - - my_flow() - ``` - """ - ) - ) - generated_file.write( - f"For additional examples, check out the [{module_title} Module]" - f"(../examples_catalog/#{module_nesting[-1]}-module) " - f"under Examples Catalog.\n" - ) - - -blocks_catalog_path = Path("blocks_catalog.md") -with mkdocs_gen_files.open(blocks_catalog_path, "w") as generated_file: - insert_blocks_catalog(generated_file) diff --git a/metadata-ingestion-modules/prefect-plugin/docs/gen_examples_catalog.py b/metadata-ingestion-modules/prefect-plugin/docs/gen_examples_catalog.py deleted file mode 100644 index c8f82614e1c64..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/docs/gen_examples_catalog.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -Locates all the examples in the Collection and puts them in a single page. -""" - -import re -from collections import defaultdict -from inspect import getmembers, isclass, isfunction -from pathlib import Path -from pkgutil import iter_modules -from textwrap import dedent -from types import ModuleType -from typing import Callable, Set, Union - -import mkdocs_gen_files -from griffe.dataclasses import Docstring -from griffe.docstrings.dataclasses import DocstringSectionKind -from griffe.docstrings.parsers import Parser, parse -from prefect.logging.loggers import disable_logger -from prefect.utilities.importtools import load_module, to_qualified_name - -import prefect_datahub - -COLLECTION_SLUG = "prefect_datahub" - - -def skip_parsing(name: str, obj: Union[ModuleType, Callable], module_nesting: str): - """ - Skips parsing the object if it's a private object or if it's not in the - module nesting, preventing imports from other libraries from being added to the - examples catalog. - """ - try: - wrong_module = not to_qualified_name(obj).startswith(module_nesting) - except AttributeError: - wrong_module = False - return obj.__doc__ is None or name.startswith("_") or wrong_module - - -def skip_block_load_code_example(code_example: str) -> bool: - """ - Skips the code example if it's just showing how to load a Block. - """ - return re.search(r'\.load\("BLOCK_NAME"\)\s*$', code_example.rstrip("`")) - - -def get_code_examples(obj: Union[ModuleType, Callable]) -> Set[str]: - """ - Gathers all the code examples within an object. - """ - code_examples = set() - with disable_logger("griffe.docstrings.google"): - with disable_logger("griffe.agents.nodes"): - docstring = Docstring(obj.__doc__) - parsed_sections = parse(docstring, Parser.google) - - for section in parsed_sections: - if section.kind == DocstringSectionKind.examples: - code_example = "\n".join( - (part[1] for part in section.as_dict().get("value", [])) - ) - if not skip_block_load_code_example(code_example): - code_examples.add(code_example) - if section.kind == DocstringSectionKind.admonition: - value = section.as_dict().get("value", {}) - if value.get("annotation") == "example": - code_example = value.get("description") - if not skip_block_load_code_example(code_example): - code_examples.add(code_example) - - return code_examples - - -code_examples_grouping = defaultdict(set) -for _, module_name, ispkg in iter_modules(prefect_datahub.__path__): - - module_nesting = f"{COLLECTION_SLUG}.{module_name}" - module_obj = load_module(module_nesting) - - # find all module examples - if skip_parsing(module_name, module_obj, module_nesting): - continue - code_examples_grouping[module_name] |= get_code_examples(module_obj) - - # find all class and method examples - for class_name, class_obj in getmembers(module_obj, isclass): - if skip_parsing(class_name, class_obj, module_nesting): - continue - code_examples_grouping[module_name] |= get_code_examples(class_obj) - for method_name, method_obj in getmembers(class_obj, isfunction): - if skip_parsing(method_name, method_obj, module_nesting): - continue - code_examples_grouping[module_name] |= get_code_examples(method_obj) - - # find all function examples - for function_name, function_obj in getmembers(module_obj, callable): - if skip_parsing(function_name, function_obj, module_nesting): - continue - code_examples_grouping[module_name] |= get_code_examples(function_obj) - - -examples_catalog_path = Path("examples_catalog.md") -with mkdocs_gen_files.open(examples_catalog_path, "w") as generated_file: - generated_file.write( - dedent( - """ - # Examples Catalog - - Below is a list of examples for `prefect-datahub`. - """ - ) - ) - for module_name, code_examples in code_examples_grouping.items(): - if len(code_examples) == 0: - continue - module_title = module_name.replace("_", " ").title() - generated_file.write( - f"## [{module_title} Module][{COLLECTION_SLUG}.{module_name}]\n" - ) - for code_example in code_examples: - generated_file.write(code_example + "\n") diff --git a/metadata-ingestion-modules/prefect-plugin/docs/gen_home_page.py b/metadata-ingestion-modules/prefect-plugin/docs/gen_home_page.py deleted file mode 100644 index 334113414ed1f..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/docs/gen_home_page.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -Copies README.md to index.md. -""" - -from pathlib import Path - -import mkdocs_gen_files - -# Home page - -readme_path = Path("README.md") -docs_index_path = Path("index.md") - -with open(readme_path, "r") as readme: - with mkdocs_gen_files.open(docs_index_path, "w") as generated_file: - for line in readme: - if line.startswith("Visit the full docs [here]("): - continue # prevent linking to itself - generated_file.write(line) - - mkdocs_gen_files.set_edit_path(Path(docs_index_path), readme_path) diff --git a/metadata-ingestion-modules/prefect-plugin/docs/img/favicon.ico b/metadata-ingestion-modules/prefect-plugin/docs/img/favicon.ico deleted file mode 100644 index c4b421585b5f5cbbb793df9d0f0c7c09341d5989..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15406 zcmeHOX>e256~0c2jl9b%wk1pMk4}DM+RU^~NR!qfl;D)rq-h{iy3Hg_r<0_ehA^Q7 z8Xyi~Cln}wn1p>Z`%c&lW-(v`wj@fRkW6WrP9Qj7AwTK8)9+mAxss4&$(C%Vt!Cal zE$`m*opZl?&b{Yc36UV~7nzv?cdK~mYeGCMgs@t@pC2#^QHk#!ddT;ov^1M& zP6p9iDbck*V)D@TK13^_!vg3~k?N#pF?bQYr2La+5A7`=TAxR>B#S)L(ly5bafF77 zAGFtnY+rBj(7__2&CubaZ1T*>(60SRXjCq*J?Q6=w)#Q)1UhdyNi`;oc0z~Mx#XFj zsoffawj*F3zRHD4;|wdq_HnFm2z00g7c|e}EUjg@(3;T4XTMJJ+}_;BL-vvzZME0G z=P{?MoI&bRuMXQ(^b%})V$O+Y`~&Tc(G=S`)d+JyQ}_G7ailkIG#cKH*kk4U~_ zKV!x91TOYZ(Y$Dp=tb*tuhKH(SiH}Orfn>qrPGkVAy1L>wKD5pv@ml5Rfz0Z^c_hb zQvRlVh1S4leF&fP7x=BY88>NKN(H?w@+0Y^GtWr*TOfZO{0;lv#qiziduOD>|E4t2 z1j9^vU6?u}BV1c4e|sN=s^G6y!3LGE!9v(zHhk|?`0qcZ5WQizLx1QsC(<|&PG0bD zQvR+2Mf%INumOA~y$>6_2OCU-4JIQ7{V|#7wWJ&LqUaUezO$dD{Jn(=ZG*qv0QoC0 z2JrhdAM$5Heg))DfDOj>j-UMfkiQc#XCuY{K3~Q@<{`(wcQFQULH;;H{Nx`jQfN;h z(H7W%dAJNV;4zp78%&1{roaZ{<0!w{q|nEZ&vCm7@;P>Y03IxW4Q9ax?=TOO=<>921_{l$ER_L&aXfJHA)i(ygzFmdKQndG`@H^zY1PP~Tn^94#pns&HU zXf4CA{uw6r+BXL53wZt4>I;mdOij~{_Y;~{?zavr7dvT? z+zNyf5%xE%`Fc^KRiQH$q8iwM#~_3+P^PDA=YHt74%F9!!-WU7r~-~}g%#Ask5YfT zwT0>-{{-Y8g8V%g1AkwDn54|i&@K%OAz#N)E)?KcMT6zGrwwVLzb3n76xO{qjDbc+ zFb4a4zJUF)&lf0INB@J@(Ym~#yNdO-`f8$+HszkN>deLxV=Qhb*Urrb!2ip4i!94y9*lN3pB|?_yYGrqv|df z9+r7`n4tP6g?8ZiME8}|`HE`uTVHolek%^_KWS^ub5sR;uFBQ0pL1U>6Yfoq2yNwv zMEB)jxpG}E*VmoWUze}nc^^z`b4J}=WIVU*nIz@Xs(g2VzPeof9wYsA`FtmP;iF`; z>t>K?NR8f%hm54(4Wp1eUmz> zuP(RsZv7Q0`{A&{e>CpfM?kESU;w}nmZI1f%!gin1@DyKZ7H*^sA zH{?07*Ywb`?8{UMJ(gtjL#*xn6@V`!{*A!TdsSYeFU|JQV&mtuFntrvH1vf(h$!FO zUAjvAoAaG=ZO1tm-sf=6f%7i&((ll$)a6vs`%#QXdUs(ACs*R%0{pyBIDhc|)$4Fl&fKck;_ub-9p_Z2#2 zo`LuI%puM@a^86F^9NTtj7X5kKumr82Gz+okz@{8sO)AE9X0z zLp+b~Sx@-I0_Ko67W54KCySjjXU?^L&fDu8;#jcKFBWhOgVz&0kG&k~8Td~D|1r+@ zVhp(U!egMv0$$VUv4CqJdMuchM(UK*YqtgooOtQJm*mfBi&NGNbPjRdfH|bEC)$Vw zEfZ5b$DR;pYfHsXoBDTMo!Q@75`P`=pG3`p>n@ygXAbGHKwnP;!~*zIaLDymidtPN z)U!{DCS{n|i1Q>PsZ`|n_aE=~68~A?KMnlMp~Jwx-^Zb?h-ESsfJ4jSg97&j+*ZmH1LcOU0__=1ob*ma5hwh05SkLM6*cS_26=`ZsKcQZKItag4 z2R^5C9b-~SLqr*Pl^PE&N&L1FC!MnrorVrv`)hX|!~R&)xu&M6sKcqZ2K(x8L3JSD zXB|}Na1+;7T;Gnh4w66il4gtnQ61OKeDgRYhkUU>t|yG-dN)n2eNw1*hqi$~SRbAr z!^I-j1LF~s{EVzI*-G}H?&zemz<-K4ZU_=dr#osPyg&T+`Fl`u;-o437}M zmscu{O!$Omq`Vrdv7km%7Bv?C82D}QLz=8zN)Evfas5)p0^t8oj!2%Zam`3qFAfxH zz}yYCx$iSSgPNPF{4Vh}ZO^|=^~Kd#JH<4&MD-b&R9Ey0oiNoQ7BtHmc$iq=nw_Dd z4jK|a<3$~mno+0xVnl*^?WIKJ=BnJgYNM6U{NW59&Z5N_KaVHoa{`^nFQKCaFVmsI zLx=^g_55!Q%9X`AO7jV`J23Yi zAN)Ee**0`6J&X3`K0~|v)MHJj2JZ{p^D^Da2-LcPpK-fTi?#I^wBox$+wfds%craJ zT3mI-K6?k(ZsLny*F%oUR*_8G@?N0Ld9}1Y_YUTsXrW@-R;VYD(RQU+TbTP&XOPv?2R9;Lt%@k==ZEzR|s|uW--%PJ+5-w84FT zL5_0gcrZ_baqFME&i(;o!SiDsc(o#9MAN*?x;3McTz^{yo}97pc>$a&^{e0aAo~aC z;K)v+hJwe`bLMT*pM=G!+dKH)_4cR6+*Gm`4?;b$64y1@S`E+fV9frXyAHps`!yDz zE?m3=xNk+y5r4eF@8gf3m?w_H@8L|(8sK)t2Dh$9{P2h1Nzj>f9YbgCvBxiCFZ0A^ z*$#VufwLl=;CFkv#STC16iL(oo;Z9w>5xBZ4{y-#e5OA}>mStrAB4>t;%jdoeid=7_B8#8?WpJ84_?u+a=1M7efvwIpoF NR+s-j|KD$ce*-+RrwsrA diff --git a/metadata-ingestion-modules/prefect-plugin/docs/img/prefect-logo-mark-solid-white-500.png b/metadata-ingestion-modules/prefect-plugin/docs/img/prefect-logo-mark-solid-white-500.png deleted file mode 100644 index f83aa6ef6a34ee4c596bd1c7c2046a2f05cb9342..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16294 zcmeHui93{U-}jhCO)18bC1MPr#S%*PVJu}=wkQgrjY9TqO7<{Gq6|{fC=!yag=j;I z?35&z*MTDY)qN19Ds@i@vLgtHu7KVvdv|0_|Io}kKi0N zJq^D$Upjo;2mWec{jjC(g9Y!F5u!N)+~fZfEs+f zc*gfMHsIm~Pam}aP2BnjHTcYWERVyk5Ai*(iL*1cz!JQ?&tMPADaa|{w6&-U$}1|$!U$QPKu_P(0kWPx z5`P!@*E)J8L+<(t zre3aR-2(Ma`<~Ha=~z}lRd&Ck)xUK7clv)B^IvAOs8TzA^o)(Qks16%w&UbL=oI}*+^^EZ*zUu4)7d9+->mfbbgt)9wDR+@ zJbS{e#(4F%9?3tCRyN&`S3LdeZBplSkD1A{!@B#AoOOOEb#{LqbyrHB_>ItChZ0>} zc$)$j0;ej5=aw}6zfT9WeC#h_lAD4;ZkJBNCt7-$R;}uiPx$%oWYyn%TTs|!4Ps_y z<~2$*!EkPQf#UvnwRr4uyL`dw?J=(Q@2U@eAFEeP!bBiYSOgpTqaI#YE8AXRVbosB zkgn5SIKx&=>~xP4s=WSp0F662xl(V{;?rQ$Tu6a^|M%Q9bE^+w0kwOa2Xf!@qVWhM zCZbLcCHbQUR}|kf!`bM}#DuJby3@1;Uu6$(ZQLZNq01VBKt)Mn-+AuwzUb_9Hm*-7 z_!uoZingie>fe3%j^M9eH1p#aZoW$xrzZqwRR~nI-7Z#X45MhZQY&ytc(rY8tlfmU zlkt^`GmEa}L#os4{gZSXC719lE&581M9^eTfFc7CY512aya3zg5@Rai47I!Q%hk?G zWW)Ghwu$}*1<;SP^P4_*54QvJbifsvoX)~z-xoYn%h?w@zg>27@3 zm9>_ql_|YZ%x9pXZn(q0G~~?(J~aN69TFXCN51g%67uHl-y!)5q>kL0Ty~BgkvHL1 zp6x#ID{`7RKSDPC6bc5S@-IBsK*|a&2;Kj)qy3)J%o$?;mo1`1^ng-r!SJvI<4k9O z7;>B&9qut6nqjV!eF+&;)xq{7CxELwdP=yVpF?=B&Zz^)pIjHp%!?1Awn*2tExz9r z5j~h$EWt?qJk5rllZa?ry6h~Artx^ZYc{?^JLo(2t}|NdJp+qHqBH9eNKJGyi5XGa zy~mYSFztL1y>C}UTN=*!`pwOyu2d60VMYX=RzKrZL-HHtVv)5{g zohq;2yZcB+wD~0L3AH!M-X*cNcd>1pvN+Mc@t*jQ?^8O(;fPjgY-WPyJBu$P#c1qY zar5&&na%q!3-bA|2W$@PAQ=i(R~sCN9(>oyZ2R-T6{w?*2z+DS&Y^o*Go1?Jn!jpr zb}RZdklps3rQ=bUMLf34<-bGK z@x&C7Q*eQA@wHw46ryO{;^;K5Q=Nv+fdBhSllEk-raEok>h{= z=Z)Q)lS$@B?uOE_5uL$!=hT;=h?j$wC(hgJwW!VvCSJ`~?=_^6_t!ohD9eg@C9NzM zj6`cLVVb&Lvq%6rd$b=xUUUwx?0<#F)ixE3+!;b1kUl;=WdaH@`C;65(bxc1I?5HL zywt~6nKqDPFQ6_IoKb9A(~j0$z~p^qQ=CyA)Y}`(J0jty$mlDd<=ZHZB?Z4+ zPIg;X8)qa8D3qERXrWIZyeSCdq+QJSe3kOT!MW#7(pox0j~tI0G~^&S3LA_B zHv0VOKg{y(dwYZ<>qOUy8>nwg(FZx>)Yxk~Zj!>|FICyJ3q=0(r?QI)`-bu%Pde8g z7Fw+CcR9cZX~Oc)aIs}1*-&KDxPE-IdB@w+EZ5p{K_#hA=@J{t(F zXME6_X@@;Kx%6tp@mie3)0>qWYaV_Ycwn#H)MByx>|@Hn4qbDt$h%h%B(Jwg=7dwO zNK^0|PuCy2y+m^!iBxavPHa|*XfhO6&?L$gtF#se+f+R?-<^!mmESiaiwh@Xs_f}$ z)~be%ozr;#6p6N7j3En+l;N8k6GPh!M(!$^ICJd8tsOtGnr2m7GKNa&yBQ;OBeHJy zMdwkBL)Z>IS?jX0<7r85<`@-z$ zbc#FWRWd1yT33r{zJWhRJ1L}|GVC2+wRo*fY4f@G=1kIB0;QD{Zr*MhAHqCZ*sjQh ziNKS=JTiS~LXBcM(xM;vNe-QBHgs*u>J3V;D|I_s6l)>K*5GJS#~pP4wI_Fl%fgh^ z%4~^%Z68je2>%B`e6tzLKyo4?$)}Qc&DLkUK6iYIUv_P;=4FG`=P5-rvV^-w_0`uK zP}paN*}MJ1lN16yxy-xGF+sZ1DKA4Edxxu*4MzvkS2;AREa|@;ZNw$C-x0&b5yVE2 z$W(?#lPaT8BsWMP$8dWh<85io@}tr`{nhGrX-uHHnj;i8{Jb8#OYMs>;ggJPZOW9G zoJGl<)5=r0IS<|9TJc4K8r7B_T^4-NO}5r3-s@=Sbtl`dqgS^2cg%7?K1CqUV?5$k z&R+brqg^5-2?+?xNPz%7D(kaD{cEeYk<>-3J={{exRn{X)Yh$Wr$TNDrq#Ox~!&LyyLiP8%n! z9W{M*y#07ZxbdVB5lR}&24Y@kJyo?eF8uI-Q>EEwpCZHbI|$t@!GNC0Wy2rZi0vPi zJlh2d>_@g1b0FD~n3%-67xq-AfymELbCKb29Z1T7y^dL_2ExeDH*eG zC`Xq0WD`5vwm^NN%~n~j=r1*Sn(@_2{^rN>(NN)zP14E#!XX;4_2yYx<&nNxb!Nj$?Qf65b=wKYMZY}%=3m73$Z%b_cAF!) zox;N{-LMgD?h*ZE#_xJddS&U%vzfJz7o{n;UzPYb$e}^1*dV04-oP6Z;g~iFjtjf4 zI5Fs+?P6B5t7T)u*5l61t;g2a6Wsh~MfsQ&D_sao*x(S8drC;V;Ks3t!pt7HzYo>s%C}l zY>`^qM-MP1Na_wuYxjK&7mC-{JjeU?0>RCl+)Ekv76e(Q@(rpX6iy~Km{u*v_?6n3 zS-ci(Js?TUnMXMv;T4B16QJHn()Wv;coEYlwDh|>RmpEHY(>X9J~JmkC!1B`%fv#d zpwKPr9ByB9GOuuc#~=P+B^P%Yt=EUXy0ToN0D~VXXp6hh$L{RqMoTPlZ0n2tG#Sy!bulu$}>=b}54^5glJ#;v)74W{$mT+G3wL6o`oTKPNIexoiaT zkKZa@(xRD=KZyUCV*~2rASlc9@+`eLw}L!yr8sz|SAtT*Rhh)$*0^r85%@i@*&fo4 zXPIRQnkSBacV<#m*Lo+kABP=JJwAOE&+>)jEo_lwqXz~a?(vJw^3q3 zQIi*VSY41HBq~8G-X1r2pVJXW=zG@ru9G)j`V)kl_rT_dgy8c<=bgDyhW9a&p%}I~ zSA_zMVNIaz1zU4@jYM@-qivBqn=cGOM${SgW9)XHjA}0lJ{MJ#sDX~-i-SkG5 zQb$3lqv_J+P3)|YeKTHa8+0JvP$v7_prKbNXJ|i%Jx|ZviymAn8FQg5_g+LX{t~GO zrH7Ds|A1k(6hk4fg)r^)J+7rQ+;sb2_l>B#1zg-JRqC<*V6kjV@2(ij1Dzeyr~MUz z0y`KT`|pp;&o_(1w-vjK`o7x4hSoDgvsAU;qQ-)Wg#Zdsw=LrpDD<`p^Ff;NL49|k){rzAPr!70%MFjoNt~{rO~77em@;^c_)n$qNR~i+2-PL4Q*OKV0+J<(K6WUUE*!Wni@Y z%y*VUCToL%geRnop9TZC-w`sK=JqxWU$oP<&#W*-S&*g8IuLFhzN_~V%DzEV(J#JQ z7Ivur>23^MIRjVDYoT9;+8||}L0o%xJ3{Ab1X0X5#(OCA0ymh7Vv=g# zH3W(Qye_5xF}jaZYpC&<5vg@CSN^UnjKp-BBXXwP-|UU$9l->L&h)CZ>pLC2Jy0wy zdOm}T4Xt5}$kG0A)jgft4#$Z9*4Jb`-VrD5(n=G|R$q@2^1HjQGs^U7;ot$s4}}mj zUV#&EXc~+hM0o=tJ~6n^5&dj^TGF?Mu$+TmK6X~joREM11jUAKz$0>+Fznq5yMfcS zxE-HAcCIjg?rq)Ar+qfEQwYidPPWJz&I4L0>?~36W$F8lqEPRlnfws1s_BUVf?luy zfQYU^;if8wI|B+aT6NmrY2_olHn+TtLeSPSf;ZGi2Rf)+ZWjYAtTvU&-1J7$GvVsy8)XOEhQ_X`l(H}D>4 z|8!5Gfo5&P9!A;VIIH+a|G2OLlnDVr6@yjaA@Ub%Kj^sHDc|mSS4f>OEzx-OlWsQj z`_+*$Uz?6^_KjK5&bR!O0A*c?L0l#Qdl4*YV*c}DB-%Gd)0IZg31}c&oH)A&SitO~|Lym(vjhFmN!F0ydZEak&9`1@jAXngF*gZQAWD55P1`Jgy9-)2}p8@kTF6Xwh z|D#?Qed$D-KH3-h&XcYIJCP&|*lCYN)fzcnL*PiS$3|41R7Be&sg_mjGbykWfCOG7 zWbSQ!`Yt0$2dfWkHRgXFx(^(K?=jiWB|=?%KS@x{38K5B(SuQUKajtuBIDBUim%@; zgWgWgjbi@gE?ZDHanJBHc}v6o2aDU=7cV-z>r9DLxN#k?4Wbm2{(codhuY_Nz}xV5 zvT(ugP4qvO?XCHaj~8F=aTY*rL9vn@3oqD{7$F%(p@@W?GNwn8YmMl?kkdNtX(MGF zinvMCR=CBR0rO^`AIR%-KdZAI$8`*no4vLt*xKO9pT}F%zKPjrIusqJ+2(8I z#&xn2kwg&?p~(HiVsR3uNR?lCN|j~~lDtMQu1WRn`sl|DSU&x9O~1Wkl#Ycq^rXr1 z24X2VN$D@caSoVJ@V<#dKTOwd-*TVN=ysfO-x=d9ZM)bNViY*Ax#FM`S`u1nX`zEV zPVOL*asD>_d<$AuH|JlBO=olx-mV^f`q5%p%ee|cR0c9w4KK&8wqsOriGiI_Q}O3w zJ2m%`bLMLlj&mWoA(nEZh*J1$Gd$0fQrD-hfTDz|nu!;~JDs^o01oNNp5B4Nfv_aD zZZX}H7NO+Lbi8>|x$EN7gb<;2Rvh#cI@BMHmjnSx60?QQ@Y-GWsMhMbcZspM1qH2Sn}yes(?_H$udi^@J6Yzsjs4C9{uBWMFd-sD1R07xB*6^2IY>?E zTP*cTy`sii!5aHq4rKuG&M@M=lsE=GCt{!VTO`oq*Q+F1=OT0Rtc8Xx*+D=q>ss9o zh!Z>7=FzCz+;*1Ky(cklT{VxwYV~Pkjp3YtLJgHG3Y|`$fzi)DAI%bYM|RK1NL6m@ zV{#S;Lxuy(-nK1sLMPh*k<)SN4|pObxgmv2dtsI(5b8$ktncf~AWZkfzi0}E6!UK@ z-OJ`q4I}Uvhdp;Vzey$FGO_cnP-Sg&NI^&DHb3ZTEi}Lzmc^t>K`};O#Ym+!6rvm_9X3>T$qc75zo-Ng!&0d#->Fb$5v=s`Y z7L7y&VJ^KGYnOcZB>H=MYu^+~bM;EQO1_p;s8%86QEnXc<(i2E8)6*}B+I?Ce{V(v z%4X2DJULd|{I#|@-DmJp;<;lq;c$Onh({dX=BpM@TbIZBusj9k2(MYnbsw72!CE7E z>;Q2x4tdj{+^P|7PcM2bYN@$Lsy06;D783Alh3QT)`Y&px>Xh-T*f#BhR4J1cp^oVhPN$xmdyrubCvr|sHkiD0Ju*YRq{?@km(`oV-T*{>% zK=Sx$N)&ND^^d)mL*G$L%qrZ&xKSHFC-(B9HCJOwF_(zR!L*IU8*4FNMx>s8%&dz8 zyf14qVq`BvyhuX;YAGu1dA*X<*{Lgev*o>5jVunkXdm-udD)TuKlM zlr}~56&ciZ-L1K|WadYp7W~Zgo=|NNg&gjq5;gcJ;owq6B-9TmCH1aIyaZqvp}cpd zL;b9ay_Pc{u3@6y`79@D*8C#!U&MpoByMATG8BJy3n8>{RIlPmn=OrCMs|_WO3RH7 zqk-N<(3U)ZlJkX!e6qs6&DiNbRZouONe#wjhzyv1Qv$i^Cu72ueSPS#qMR zxL;y^e|gtWug7JkHxIM0`~YnF!OO1W*dGbKt>>NIxKi0Tli~rO=qtatb1!s>VU~IU z@;e~)W?0<%j0qi`H1$jQSk~-Xu87kPkgsH=+v4cK(6|w;_L6TUX%xrYv3QXMZOSGq z&B4-KBVwH^?BJUJp^x<7q?rBid(YpZTU_QeVpbv#dd8Xo@|QaNHb$zaHwM^5i-|cJ z;1%7U_b48&(nhhZ`F`%2egSBs3uhywjhN=*8?PZhy|#FsgU=?Bl)7XieqP>!gxW1m zb1Q2^oXoSN<1pg*v*C)bSZ`+HJI}zkhjJXR#D;!M2>nd0IN#W^_U?yBu7_O;;?%bEtV1V|PfT=y_M1SDIvtYk9Ma-mLxRkE(%_76)#& z^HqlTzqJTC|1#k6c8HMM?XMU0G&iTyZ04tv1DJ%jTEAH^&A^udQsd+rW)lRteL`{O z%Mo!o3uw(K8OgSd;cOM@i#%IY8h*U@+}v_uyKkx_w>QL3aT9seGS;))R)?plwDAaC2{xBFMP%k3#nH%XYh~mUL*J z-uaxC3ENjJ)-fQHGQ355e$h`Xhq)`YD@apcw1qUqS??YdF^NC56M2#_%^dL>8@9@{ zjCQ&z^8LBj!&jD{8Dqnp=^EwSpQ^MI+GZ1{e1eAM1=_U;65+U+U9$Z&o+)D{!Ym=ewr2EwpHi`B+U@T7p%!N=YiP)aRI` z(PVl`YU{E{rS6E2h1jL6n|`6s?<&n;X9hYv!|-Kt-ikA9T_wBGPw-e*+SbM*#0_g( z=+A6_ZU5BGUvbXCAh9_&mS-EWP7A5SLMYtN-Pj#HIL?AF7}upI1fT()J|A;0L8Ey7 z^9iUMhIYhpY5NmsLZWeKO;YxYnB}aIGWGW5WKyt<{5^=VnxvOsJ`QgFm|DDv)g0#% zFgb>zAC0Kdl&maOiSp*P!=GwQ>Csm8s@~!IvEBe?VReu`X8;;9{&Q;doTBa&PHA>L zwYgq(hSz@8@BnE|^FUHU$dF)hFgk(7gb7)g&2#du-UvzmB~PHH#M-8yiTZOxo1h$* zmPp#?r@RTdfd|d1IU_852KaQ7B;RZ!sY|SJx34`OS|N@N*lvS7l_cq=ya6DDJLhGz z?g5a|i_#H2Cx8x>xq05x@C&0lkzn9ST6;FO);6?nR?L#+07U#x)X@9RYn@9bet zv;JZ^t%4h3d%`7aI*RpIH&DbZd{!;!Vw8;}bpsPWUQiSEQd}(;XMtI0V)YbNHM%+N z&clz8=vxtemA9(=OT0B}4%3-)H|4)B=2EoWhtkJSd#pr|kFoHxZ<`|%LeCY;zwcb> z`>l)gzFALQ!_B=TW%Zs;O7bdLmA6U5i;qIp$E||_kFI|)=kr&6f#w*)f<@41xw&s&v^N{m1yuD!Z(OCzR_y=?jIJNK~AXUsGTGj2x5V zm>-lfIL6da8)3fS-?_=L$m=sBB!RZt^g8SF%xE_wB~-gLsGb{35KH_`aai0PTIY1L z$tufseLsL-x=|TIOYx--$E~JrdxXE8Nxjh23{j)1Jn3Tx=YX4--pJkQ9_m-^6QOrz zP48MzOQ2>JW(zOxkZv{IhE8elbx>BYoOlfh=~rLWe)}f&6tmaQC%wpuTuI(%^s~L?d zUV2ngC~r%DGM{CAMci2Xop@|``sqx)J06m(5titA$}Kb6^4H^O1C}dS&J1TUu8OYQ z^)tdjKY=6OPha0e@)SvZiIX#+mh30QD&)z+g@>vfHxu{(BJeYPy4potjY5*gH0ZZ zdrMUd85ywWxbvC30^RT#!#0)rgBr2RnNm1Xut6nzZLqvgt--q_gl?;R-W(dh;pm8x z3mB2L-8XO2Dig>5J*+t{P^DC`q~FU#IdrQ z%)*zl-m+fSiXF#{CPAE&+DQKKvZbVja=A$-LE_sqbbq~2aov64SlZG~IQ za4<9Q+`4;C70ksen3^?NB)Si z-cN5n^p)XX+)AqKeK0m~b6}jEwTl*TDrWR#154X>NO1{fqpGv9y#pU=q~XkeTjZC!2;He0aobr(5Eu07 zaLfbk^~}ks@rtPk#%8ZFDeodfV%>Yx8)DharUk%PVHjL9;9J;4-yOdkzVtPwN`PQ9 zrh~{~Ml|eUiRC*Mqd4(LbE&rUhR-fK%9u&7P=(Sg)>{_mJ)6DN5G!)2{vt^h)Qyoz!5^b)sd$o>PE_bm#$MXQ zU2)dpEj0B_+3ydEW5n3p6K-8b+fi#c?C+FKc8!tYzj0Giu;vz&0ia}tuiqPpV|KE& z7Upd=uJeBJQPw+)YwgL{-Cf>mRi!*uZm zSX%uEld|~UQ{{T$5imy@v;aS4@joy7LhSoT|jVtbfcqpd1bA#QOWSjgSXz738(aN-!xdxJlHvLx0r#vW4AjHyl?5;zr< zkuO+-uXv6QgjN&O7?fLLw$-}y7`@dyJq>NIc3U_D{^)XJ>jY4c$I{%_?rCd^fH9S; zk~_88+#_8*TL`_&2pWU;&)Q_bqL4^a(vq}W2gMP4IsDy{AvF(wHl0;KMFA2LjeF+@QA2#Qvi@rF!LXdMcAZ{v39b)>$$J<7jk~&`sjP@x zYhjy|@3{N2_HWM=(p@+;UIG=Zm;}J_F#zK)FULimC=Lu)AbKsY7zh^s`!6l$H_|S?`y97@~bl0_i85%p% z7}~mB5dm<@A$SWkcbk5I0R~ZkWksW1tLsDTSvrOL*O(OeTakUZ~D??{8LaESugCHgASN0v2F-@)uF9jNEEis ztj_kr?C;~(n!_C+iD}P{vEm~}FcG0Embl9SoWcz3Y~^dopOr!3y!oK<8ln+q;l=ID z3aH75M0BL~OX9&MH~4&YGej9xqj|?^S(DJc#|FN9!!gIzS+;-?B;)7ng~L8v;D&&8 zon?;&zxWq2H(KykbmT$^mH-7x5YLogWFjJ_jzEC=8BFB_qt>R@7(;w4Y}^pji-%>> z@$&?cK$Qz%*Ud4ojF{!XzL}n0Lp`$PgHfZSU{FVY-Y>@7xgOCPUuj2OT?x2T7M2DT z!_?Q@_2Brf+4LInpL59)L-YfKY&mbP+;IE$lJ5h*5)V`PS96T0{8}s>6Cp0KsZ~be zaj1R6mlAt%wTARhJWTbz)u?7wb59vQ^hY>XX+>w9Bw(G9{DFe@wdSp@w$+x3%<zGj>tH;PC>##clIGCKEU3|^?}_cHpRB}n%>F8L%mV>T0~9sgaP>AG zyAk+_Y$PW!K`xbefY%3(s%if9jik$&(wqgPw8zNWC zNDzJYpDPtYO7NhMGUev(C4}gGzpw*I0E~w4#nl1R92xPDe8hHSBPve(2ZzqzBYa36 zhws5mjR<8(=8$i&r{Ea(7d&QtP6oQkI+#%~5lCrZ);?Q-PSz_$FfS!E?j&mzyu$h> zvfi^I%|=kbMx6&7>kA;*B3t&aPX%Q~KqZ5X@r7e<)+{*h+4y%=q!2JJgd)lPvo=Pp z>F=s|>;?##*RWsM|E`VD9Txm&LsuZi9i#W^pS7Xf*zk8l;#P2-tyd2V{IfPX)8&7e z`CrYj4CsH&?0I>$Pg{&0{fv6izVr+5YjfL`W9=%B zU)?*j0Cy}S@QJW=qUA<|8(UbD&gd+J!P)P;<1N=49&^W|8C=4Na@7(T{H?OZw$l zi!E%%-Rb4KJkys>EL@?a9sc3;B{#a8Q~_BgSrxVlj2LCz0)_rCD5z}AVb@G_-}u3quG(ypm&iI<%Azi#u*?P&^WlML&NB*CaRX-)kc8Zbu+ zdYT2pGA4)Q3^U3GJA=OpU;H@54R=IhIHz<@i6FI9fPxx~$-HnC0qU=wxslWKM+?kMu-2iMWKCy?v5>eki&UajN^4=x{HxX!Z>T}1~7v6S{PGv^8+2Vz_`I#NE> zac>C^TteZ8qf`FUwf;9dTURTg{_!9CjDt&hV;pY+POjhfONCUKdZ}0uWrM@$c>=V( zR>%TYn71Xp*Cb8jD|rSo8z(!!&=4pa=#Ol7y4&B3twyLGrnY=5FH3-i9E>jk z5agt&+uPhtpiX}gOp4CA5*BoOnuzbfVtAq4I;QMlOhbkA_bB>*pU9!h7dVbmwpIno zdnuF7H}g>*=wP*#W~-a!ZL1A%!;aL^x2d-Y*u7AC6kPg}PK1hfb#um= z*}j=e@-ySRW*(J_w2*uPZJ-Q?))uO(l0o-j5^EJx4NAA4yth;P4Ie6LBgsuCTNw(r z(wMIU`lvU7Q-AzlS2qiM9)z#&VD}(@#_y#Vd=^BTnAG98{8eTPD+CrA*iQL_i2fR_66zYn++`{_@^gV#6YVvC*4Cqu>_G z8v*-_r;pk(Bj(s%g~pQh`pe-g<%==ge63ZC`%~X}uFLo)4pY;4^PKXH>vdaDSDQnh zEL~NQs}ZQsUu|+k<2s%z`8yqo0x{#Eomy{Oe~@9A>@?PUQoo;9v;ca(Q6OftF@axR zs!eLfVUBxJzvhnc6qtT2cv0-8TXiT4L=`R6!TRwfG06@68j#QR6}@`v8%eg>kG z`l|nCicUdV_j#$s{V6 zw3esn&7tK=b+DI`$Wm0K0ey$B;^${f?iVX7sw4T!l)Td}?E?{{Xv_<+Xg7k$F=8a< zdqr&Yv$R_MY+{UcEQ5F>0VOw)9oz_csOrEWVlrvT-Th~0C83qs7k{Ei zeCG9Zi4N$^9r@Gvz_8Yr@&&NqmS%yz_};EX@iQZiNj@yp9!l(IaFeKJi|qH4sdx+o zvR`9^!K7f16*GC|NqteNw+phYC;=TN)Y?UecciBJxX}*-YS7 z57^MlR_Zeag;%U5jNO3bL1n&becv3KaGB%Sx{qN5p|=s^_I!KSGp?73t2Lt^ja_DA z3jdAk5p{s=Ec!n2={$Cf`qPo>mD()kw`rF*SB2O4bfIxoy?a_f$UBJt?;_8Ak2|RF zDnMk>;+=2ycMzxuck`>IKxnFN_*C62!6o_$E-U_XdDgZFOyhQM({=q-RqKKHWvSTZ zJE8=GmMh=4U0e4lxH+PE|J29MTM9&35w0vt2RR#hb+aZFP&VTF0B7V+4vj&nFQwd| zJi5c3u%jOYjttiBWE}ql-q_j^VeViB)0pGumIC>#wq@*meape}DHkhT^f}KuFH1i)~hG4?d>!>Q=4c zA^&g%-c4<;5U4o4-Xj7Y9`ahcJ0znz0{?P0-39Qy$||RJCeGi@6Yg{S)2Ev28xGa# zvn&chY=E57fc&0Yp+mhLfLVAN^baU`;pG37Pq4{yP!@R7GGx7YE8Gi$zrS)XQqk{U z)&SHQ)=5?XIfwt*H)v{fjN0-t{1|Mk26HF(|Ql!&Xri z1P%7=!<&3(E&ZHrg`roK8Qnowoi0N9&lgyou~^k9v&gT%k{A)7on2KI6I}Ub7KWsUMYqL|4P*$Xxetf}(u;>0|AzXf(TuXXgm-OW!-1JzX zR-R;Il1mK`ZW@*(*{ZYz>jT22!4$Nv=?| z31|O>zQTB|ms9U2X@d2kNjNLn8OCd^%qE$S7U3XUGiy38XLCyCqd_<~1_57il0D;g zPNd)J0kVCe^41(#q*()^*Wn=BJvA-=*z@s_!vS?6RXE6YFWNT8QbtN73Rk*p{Ggj>nBt=En~M+Z{hVJ36i3lxb%yAf(a2$%cB`LEnT#>h5NV27WksKtA-FN zoOJs3eyuD&kLZ4ZYy;^Nu2%ANmXkm!p3>MZv_0yi)UJ=Q^a-aS-Btsye0oexk$vs$ zn-crkV$m*KOq1w~j^&dD{XQ5-mvBo3lZ8!W`}UKT7Wh{Qj;%<#gp;0bb)u`j?AY%o zZ}~*UEf?mZs~=AK&O++zJKgdLW~33cCEQB*&cb4%0X<}kq)Rx-lyXh0I5Y~k)OueI z4KxY25MEBvM#Y#W;gD`%GPDVYbYsq;aFyWOEFj$ka44MQje1;fApkAH)!0avffnH~ zb`ydxoYe7~rVD)3;|nKsvarIIlN@~Eq?S`W*Ip3Pg~Di&$N)ws=qY*IWCIwvTn!jU#kT4S?bK9Yn( zx+y?XN!9)w{34JvX@GDFkW^A}8wUkQ5f1651PBMRB1Je2&-yAJgMQET*q-B4_74F`Bb}4 zO&!0VnEmBq+iiNRXs_wU-N{qUr%5<&Wfj6rhcDWBjt@L6sfJ;~VObtR@mqvu5zWyi9H(*Ip5Z?LDfps&)44GRO=CDi z1NBccF9PrBB-#J2jWJJWJQis*_M&m1O*k{7a69_(Qx?5?VO&tjWG9-VPq>`y=tMOw zgS7Tnf&UoPA~Yut?(XioKK_)-*_3pN=F@qgg(5~YM_0lbO1Y_o3l+_Ag+0~(5ht1> zM7ZmO_h`E%(LRjWLD&^8Ty!rQ2l|9F^cTI2Jl0#1>_f!Pflb4gk;fsC$vQXYU~1t~ zh~^(hNvIOd(Kl&u44pir6wRlEiGxbf93cs3C|yE?yC%tY6K%TN18@#vHFep$R4CkRof~ryT2dW$EVuDGf^&)HfBs*YK5J@<63sDGI1X6JA4xlC zk0M2LNW5)6+kL_wX75GgfJEWmF*t@Hu<1qPK z^vDURk3Qm9n*M-j{Yadp8X9>2&EPnURO63Gd-r-AyESkZnbxshlIkp6()sTP5K(ca+xCh3q|QXMJxR#o?+aX`Z( z$Wu%nu8?SX(2#I3lZT=^H|9V?!i||cWWO&OxJQFYq#oG`}x3oR;Ny^${$Q~&?~07*qoM6N<$f?w7gpa1{> diff --git a/metadata-ingestion-modules/prefect-plugin/docs/overrides/partials/integrations/analytics/custom.html b/metadata-ingestion-modules/prefect-plugin/docs/overrides/partials/integrations/analytics/custom.html deleted file mode 100644 index 96a2301be822f..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/docs/overrides/partials/integrations/analytics/custom.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - - diff --git a/metadata-ingestion-modules/prefect-plugin/docs/stylesheets/extra.css b/metadata-ingestion-modules/prefect-plugin/docs/stylesheets/extra.css deleted file mode 100644 index 11a020958ecd8..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/docs/stylesheets/extra.css +++ /dev/null @@ -1,114 +0,0 @@ -/* theme */ -:root > * { - /* theme */ - --md-primary-fg-color: #115AF4; - --md-primary-fg-color--light: #115AF4; - --md-primary-fg-color--dark: #115AF4; -} - -/* Table formatting */ -.md-typeset table:not([class]) td { - padding: 0.5em 1.25em; -} -.md-typeset table:not([class]) th { - padding: 0.5em 1.25em; -} - -/* convenience class to keep lines from breaking -useful for wrapping table cell text in a span -to force column width */ -.no-wrap { - white-space: nowrap; -} - -/* badge formatting */ -.badge::before { - background-color: #1860F2; - color: white; - font-size: 0.8rem; - font-weight: normal; - padding: 4px 8px; - margin-left: 0.5rem; - vertical-align: super; - text-align: center; - border-radius: 5px; -} - -.badge-api::before { - background-color: #1860F2; - color: white; - font-size: 0.8rem; - font-weight: normal; - padding: 4px 8px; - text-align: center; - border-radius: 5px; -} - -.experimental::before { - background-color: #FCD14E; - content: "Experimental"; -} - -.cloud::before { - background-color: #799AF7; - content: "Prefect Cloud"; -} - -.deprecated::before { - background-color: #FA1C2F; - content: "Deprecated"; -} - -.new::before { - background-color: #2AC769; - content: "New"; -} - -.expert::before { - background-color: #726576; - content: "Advanced"; -} - -/* dark mode slate theme */ -/* dark mode code overrides */ -[data-md-color-scheme="slate"] { - --md-code-bg-color: #252a33; - --md-code-fg-color: #eee; - --md-code-hl-color: #3b3d54; - --md-code-hl-name-color: #eee; -} - -/* dark mode link overrides */ -[data-md-color-scheme="slate"] .md-typeset a { - color: var(--blue); -} - -[data-md-color-scheme="slate"] .md-typeset a:hover { - font-weight: bold; -} - -/* dark mode nav overrides */ -[data-md-color-scheme="slate"] .md-nav--primary .md-nav__item--active>.md-nav__link { - color: var(--blue); - font-weight: bold; -} - -[data-md-color-scheme="slate"] .md-nav--primary .md-nav__link--active { - color: var(--blue); - font-weight: bold; -} - -/* dark mode collection catalog overrides */ -[data-md-color-scheme="slate"] .collection-item { - background-color: #3b3d54; -} - -/* dark mode recipe collection overrides */ -[data-md-color-scheme="slate"] .recipe-item { - background-color: #3b3d54; -} - -/* dark mode API doc overrides */ -[data-md-color-scheme="slate"] .prefect-table th { - background-color: #3b3d54; -} \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-plugin/mkdocs.yml b/metadata-ingestion-modules/prefect-plugin/mkdocs.yml deleted file mode 100644 index e7ee84211fdae..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/mkdocs.yml +++ /dev/null @@ -1,81 +0,0 @@ -site_name: prefect-datahub -site_url: https://datahub-project.github.io/datahub -repo_url: https://github.com/datahub-project/datahub -edit_uri: edit/main/docs/ -theme: - name: material - custom_dir: docs/overrides - favicon: img/favicon.ico - palette: - - media: "(prefers-color-scheme)" - toggle: - icon: material/brightness-auto - name: Switch to light mode - - media: "(prefers-color-scheme: light)" - accent: blue - primary: blue - scheme: default - toggle: - icon: material/weather-sunny - name: Switch to dark mode - - media: "(prefers-color-scheme: dark)" - accent: blue - primary: blue - scheme: slate - toggle: - icon: material/weather-night - name: Switch to light mode - icon: - repo: fontawesome/brands/github - logo: - img/prefect-logo-mark-solid-white-500.png - font: - text: Inter - code: Source Code Pro - features: - - content.code.copy - - content.code.annotate -extra_css: - - stylesheets/extra.css -markdown_extensions: - - admonition - - attr_list - - codehilite - - md_in_html - - meta - - pymdownx.highlight: - use_pygments: true - - pymdownx.superfences - - pymdownx.tabbed - - pymdownx.inlinehilite - - pymdownx.snippets - -plugins: - - search - - gen-files: - scripts: - - docs/gen_home_page.py - - docs/gen_examples_catalog.py - - docs/gen_blocks_catalog.py - - mkdocstrings: - handlers: - python: - options: - show_root_heading: True - show_object_full_path: False - show_category_heading: True - show_bases: True - show_signature: False - heading_level: 1 -watch: - - src/prefect_datahub/ - - README.md - -nav: - - Home: index.md - - Datahub Emitter: datahub_emitter.md - - Blocks Catalog: blocks_catalog.md - - Examples Catalog: examples_catalog.md - - Concept Mapping: concept_mapping.md - - diff --git a/metadata-ingestion-modules/prefect-plugin/setup.py b/metadata-ingestion-modules/prefect-plugin/setup.py index 40b10e099b02e..530d0e24b2cb1 100644 --- a/metadata-ingestion-modules/prefect-plugin/setup.py +++ b/metadata-ingestion-modules/prefect-plugin/setup.py @@ -43,7 +43,7 @@ def get_long_description(): "types-pytz", } -base_dev_requirements = { +dev_requirements = { *base_requirements, *mypy_stubs, "black==22.12.0", @@ -66,21 +66,8 @@ def get_long_description(): "build", "twine", "packaging", - # Prefect block integration required packages - "mkdocs", - "mkdocs-material", - "mkdocstrings[python]", - "mock; python_version < '3.8'", - "mkdocs-gen-files", - "Pillow", - "flaky", } -dev_requirements = { - *base_dev_requirements, -} - - entry_points = { "prefect.block": "prefect-datahub = prefect_datahub.prefect_datahub:DatahubEmitter" } @@ -130,11 +117,5 @@ def get_long_description(): install_requires=list(base_requirements), extras_require={ "dev": list(dev_requirements), - "datahub-kafka": [ - f"acryl-datahub[datahub-kafka] == {package_metadata['__version__']}" - ], - "integration-tests": [ - f"acryl-datahub[datahub-kafka] == {package_metadata['__version__']}", - ], }, ) diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py index e8f47c8f6cd16..51b6f7c74fd07 100644 --- a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py @@ -25,7 +25,7 @@ from prefect.settings import PREFECT_API_URL from pydantic import Field -from prefect_datahub.dataset import _Entity +from prefect_datahub.entities import _Entity ORCHESTRATOR = "prefect" diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/dataset.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/entities.py similarity index 100% rename from metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/dataset.py rename to metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/entities.py diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/flow.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/flow.py index cc4a6fe1b20be..d7ea7104f25ed 100644 --- a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/flow.py +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/example/flow.py @@ -1,7 +1,7 @@ from prefect import flow, task from prefect_datahub.datahub_emitter import DatahubEmitter -from prefect_datahub.dataset import Dataset +from prefect_datahub.entities import Dataset datahub_emitter = DatahubEmitter.load("datahub-block") diff --git a/metadata-ingestion-modules/prefect-plugin/tests/unit/conftest.py b/metadata-ingestion-modules/prefect-plugin/tests/unit/conftest.py deleted file mode 100644 index e22c46f043098..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/tests/unit/conftest.py +++ /dev/null @@ -1,496 +0,0 @@ -import asyncio -import json -import logging -from typing import Dict, List, cast -from unittest.mock import MagicMock, patch -from uuid import UUID - -import pytest -from prefect.client.schemas import FlowRun, TaskRun, Workspace -from prefect.futures import PrefectFuture -from prefect.server.schemas.core import Flow -from prefect.task_runners import SequentialTaskRunner -from requests.models import Response - -mock_transform_task_json: Dict = { - "name": "transform", - "description": "Transform the actual data", - "task_key": "__main__.transform", - "tags": ["etl flow task"], -} -mock_extract_task_run_json: Dict = { - "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", - "created": "2023-06-06T05:51:54.822707+00:00", - "updated": "2023-06-06T05:51:55.126000+00:00", - "name": "Extract-0", - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_key": "__main__.extract", - "dynamic_key": "0", - "cache_key": None, - "cache_expiration": None, - "task_version": None, - "empirical_policy": { - "max_retries": 0, - "retry_delay_seconds": 0.0, - "retries": 0, - "retry_delay": 0, - "retry_jitter_factor": None, - }, - "tags": [], - "state_id": "e280decd-2cc8-4428-a70f-149bcaf95b3c", - "task_inputs": {}, - "state_type": "COMPLETED", - "state_name": "Completed", - "run_count": 1, - "flow_run_run_count": 1, - "expected_start_time": "2023-06-06T05:51:54.822183+00:00", - "next_scheduled_start_time": None, - "start_time": "2023-06-06T05:51:55.016264+00:00", - "end_time": "2023-06-06T05:51:55.096534+00:00", - "total_run_time": 0.08027, - "estimated_run_time": 0.08027, - "estimated_start_time_delta": 0.194081, - "state": { - "id": "e280decd-2cc8-4428-a70f-149bcaf95b3c", - "type": "COMPLETED", - "name": "Completed", - "timestamp": "2023-06-06T05:51:55.096534+00:00", - "message": None, - "data": {"type": "unpersisted"}, - "state_details": { - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_run_id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", - "child_flow_run_id": None, - "scheduled_time": None, - "cache_key": None, - "cache_expiration": None, - "untrackable_result": False, - "pause_timeout": None, - "pause_reschedule": False, - "pause_key": None, - "refresh_cache": None, - }, - }, -} -mock_transform_task_run_json: Dict = { - "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", - "created": "2023-06-06T05:51:55.160372+00:00", - "updated": "2023-06-06T05:51:55.358000+00:00", - "name": "transform-0", - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_key": "__main__.transform", - "dynamic_key": "0", - "cache_key": None, - "cache_expiration": None, - "task_version": None, - "empirical_policy": { - "max_retries": 0, - "retry_delay_seconds": 0.0, - "retries": 0, - "retry_delay": 0, - "retry_jitter_factor": None, - }, - "tags": [], - "state_id": "971ad82e-6e5f-4691-abab-c900358e96c2", - "task_inputs": { - "actual_data": [ - {"input_type": "task_run", "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b"} - ] - }, - "state_type": "COMPLETED", - "state_name": "Completed", - "run_count": 1, - "flow_run_run_count": 1, - "expected_start_time": "2023-06-06T05:51:55.159416+00:00", - "next_scheduled_start_time": None, - "start_time": "2023-06-06T05:51:55.243159+00:00", - "end_time": "2023-06-06T05:51:55.332950+00:00", - "total_run_time": 0.089791, - "estimated_run_time": 0.089791, - "estimated_start_time_delta": 0.083743, - "state": { - "id": "971ad82e-6e5f-4691-abab-c900358e96c2", - "type": "COMPLETED", - "name": "Completed", - "timestamp": "2023-06-06T05:51:55.332950+00:00", - "message": None, - "data": {"type": "unpersisted"}, - "state_details": { - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_run_id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", - "child_flow_run_id": None, - "scheduled_time": None, - "cache_key": None, - "cache_expiration": None, - "untrackable_result": False, - "pause_timeout": None, - "pause_reschedule": False, - "pause_key": None, - "refresh_cache": None, - }, - }, -} -mock_load_task_run_json: Dict = { - "id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", - "created": "2023-06-06T05:51:55.389823+00:00", - "updated": "2023-06-06T05:51:55.566000+00:00", - "name": "Load_task-0", - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_key": "__main__.load", - "dynamic_key": "0", - "cache_key": None, - "cache_expiration": None, - "task_version": None, - "empirical_policy": { - "max_retries": 0, - "retry_delay_seconds": 0.0, - "retries": 0, - "retry_delay": 0, - "retry_jitter_factor": None, - }, - "tags": [], - "state_id": "0cad13c8-84e4-4bcf-8616-c5904e10dcb4", - "task_inputs": { - "data": [ - {"input_type": "task_run", "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7"} - ] - }, - "state_type": "COMPLETED", - "state_name": "Completed", - "run_count": 1, - "flow_run_run_count": 1, - "expected_start_time": "2023-06-06T05:51:55.389075+00:00", - "next_scheduled_start_time": None, - "start_time": "2023-06-06T05:51:55.461812+00:00", - "end_time": "2023-06-06T05:51:55.535954+00:00", - "total_run_time": 0.074142, - "estimated_run_time": 0.074142, - "estimated_start_time_delta": 0.072737, - "state": { - "id": "0cad13c8-84e4-4bcf-8616-c5904e10dcb4", - "type": "COMPLETED", - "name": "Completed", - "timestamp": "2023-06-06T05:51:55.535954+00:00", - "message": None, - "data": {"type": "unpersisted"}, - "state_details": { - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_run_id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", - "child_flow_run_id": None, - "scheduled_time": None, - "cache_key": None, - "cache_expiration": None, - "untrackable_result": True, - "pause_timeout": None, - "pause_reschedule": False, - "pause_key": None, - "refresh_cache": None, - }, - }, -} -mock_flow_json: Dict = { - "id": "cc65498f-d950-4114-8cc1-7af9e8fdf91b", - "created": "2023-06-02T12:31:10.988697+00:00", - "updated": "2023-06-02T12:31:10.988710+00:00", - "name": "etl", - "description": "Extract transform load flow", - "tags": [], -} -mock_flow_run_json: Dict = { - "id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "created": "2023-06-06T05:51:54.544266+00:00", - "updated": "2023-06-06T05:51:55.622000+00:00", - "name": "olivine-beagle", - "flow_id": "cc65498f-d950-4114-8cc1-7af9e8fdf91b", - "state_id": "ca2db325-d98f-40e7-862e-449cd0cc9a6e", - "deployment_id": None, - "work_queue_name": None, - "flow_version": "3ba54dfa31a7c9af4161aa4cd020a527", - "parameters": {}, - "idempotency_key": None, - "context": {}, - "empirical_policy": { - "max_retries": 0, - "retry_delay_seconds": 0.0, - "retries": 0, - "retry_delay": 0, - "pause_keys": [], - "resuming": False, - }, - "tags": [], - "parent_task_run_id": None, - "state_type": "COMPLETED", - "state_name": "Completed", - "run_count": 1, - "expected_start_time": "2023-06-06T05:51:54.543357+00:00", - "next_scheduled_start_time": None, - "start_time": "2023-06-06T05:51:54.750523+00:00", - "end_time": "2023-06-06T05:51:55.596446+00:00", - "total_run_time": 0.845923, - "estimated_run_time": 0.845923, - "estimated_start_time_delta": 0.207166, - "auto_scheduled": False, - "infrastructure_document_id": None, - "infrastructure_pid": None, - "created_by": None, - "work_pool_name": None, - "state": { - "id": "ca2db325-d98f-40e7-862e-449cd0cc9a6e", - "type": "COMPLETED", - "name": "Completed", - "timestamp": "2023-06-06T05:51:55.596446+00:00", - "message": "All states completed.", - "data": {"type": "unpersisted"}, - "state_details": { - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_run_id": None, - "child_flow_run_id": None, - "scheduled_time": None, - "cache_key": None, - "cache_expiration": None, - "untrackable_result": False, - "pause_timeout": None, - "pause_reschedule": False, - "pause_key": None, - "refresh_cache": None, - }, - }, -} -mock_graph_json: List[Dict] = [ - { - "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", - "name": "Extract-0", - "upstream_dependencies": [], - "state": { - "id": "e280decd-2cc8-4428-a70f-149bcaf95b3c", - "type": "COMPLETED", - "name": "Completed", - "timestamp": "2023-06-06T05:51:55.096534+00:00", - "message": None, - "data": {"type": "unpersisted"}, - "state_details": { - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_run_id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", - "child_flow_run_id": None, - "scheduled_time": None, - "cache_key": None, - "cache_expiration": None, - "untrackable_result": False, - "pause_timeout": None, - "pause_reschedule": False, - "pause_key": None, - "refresh_cache": None, - }, - }, - "expected_start_time": "2023-06-06T05:51:54.822183+00:00", - "start_time": "2023-06-06T05:51:55.016264+00:00", - "end_time": "2023-06-06T05:51:55.096534+00:00", - "total_run_time": 0.08027, - "estimated_run_time": 0.08027, - "untrackable_result": False, - }, - { - "id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", - "name": "Load_task-0", - "upstream_dependencies": [ - {"input_type": "task_run", "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7"} - ], - "state": { - "id": "0cad13c8-84e4-4bcf-8616-c5904e10dcb4", - "type": "COMPLETED", - "name": "Completed", - "timestamp": "2023-06-06T05:51:55.535954+00:00", - "message": None, - "data": {"type": "unpersisted"}, - "state_details": { - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_run_id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", - "child_flow_run_id": None, - "scheduled_time": None, - "cache_key": None, - "cache_expiration": None, - "untrackable_result": True, - "pause_timeout": None, - "pause_reschedule": False, - "pause_key": None, - "refresh_cache": None, - }, - }, - "expected_start_time": "2023-06-06T05:51:55.389075+00:00", - "start_time": "2023-06-06T05:51:55.461812+00:00", - "end_time": "2023-06-06T05:51:55.535954+00:00", - "total_run_time": 0.074142, - "estimated_run_time": 0.074142, - "untrackable_result": True, - }, - { - "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", - "name": "transform-0", - "upstream_dependencies": [ - {"input_type": "task_run", "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b"} - ], - "state": { - "id": "971ad82e-6e5f-4691-abab-c900358e96c2", - "type": "COMPLETED", - "name": "Completed", - "timestamp": "2023-06-06T05:51:55.332950+00:00", - "message": None, - "data": {"type": "unpersisted"}, - "state_details": { - "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", - "task_run_id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", - "child_flow_run_id": None, - "scheduled_time": None, - "cache_key": None, - "cache_expiration": None, - "untrackable_result": False, - "pause_timeout": None, - "pause_reschedule": False, - "pause_key": None, - "refresh_cache": None, - }, - }, - "expected_start_time": "2023-06-06T05:51:55.159416+00:00", - "start_time": "2023-06-06T05:51:55.243159+00:00", - "end_time": "2023-06-06T05:51:55.332950+00:00", - "total_run_time": 0.089791, - "estimated_run_time": 0.089791, - "untrackable_result": False, - }, -] -mock_workspace_json: Dict = { - "account_id": "33e98cfe-ad06-4ceb-a500-c11148499f75", - "account_name": "shubhamjagtapgslabcom", - "account_handle": "shubhamjagtapgslabcom", - "workspace_id": "157eb822-1b3b-4338-ae80-98edd5d00cb9", - "workspace_name": "datahub", - "workspace_description": "", - "workspace_handle": "datahub", -} - - -async def mock_task_run_future(): - extract_prefect_future: PrefectFuture = PrefectFuture( - name=mock_extract_task_run_json["name"], - key=UUID("4552629a-ac04-4590-b286-27642292739f"), - task_runner=SequentialTaskRunner(), - ) - extract_prefect_future.task_run = cast( - None, TaskRun.parse_obj(mock_extract_task_run_json) - ) - transform_prefect_future: PrefectFuture = PrefectFuture( - name=mock_transform_task_run_json["name"], - key=UUID("40fff3e5-5ef4-4b8b-9cc8-786f91bcc656"), - task_runner=SequentialTaskRunner(), - ) - transform_prefect_future.task_run = cast( - None, TaskRun.parse_obj(mock_transform_task_run_json) - ) - load_prefect_future: PrefectFuture = PrefectFuture( - name=mock_load_task_run_json["name"], - key=UUID("7565f596-9eb0-4330-ba34-963e7839883e"), - task_runner=SequentialTaskRunner(), - ) - load_prefect_future.task_run = cast( - None, TaskRun.parse_obj(mock_load_task_run_json) - ) - return [extract_prefect_future, transform_prefect_future, load_prefect_future] - - -@pytest.fixture(scope="module") -def mock_run_logger(): - with patch( - "prefect_datahub.datahub_emitter.get_run_logger", - return_value=logging.getLogger(), - ) as mock_logger: - yield mock_logger - - -@pytest.fixture(scope="module") -def mock_run_context(mock_run_logger): - task_run_ctx = MagicMock() - task_run_ctx.task.task_key = mock_transform_task_json["task_key"] - task_run_ctx.task.name = mock_transform_task_json["name"] - task_run_ctx.task.description = mock_transform_task_json["description"] - task_run_ctx.task.tags = mock_transform_task_json["tags"] - - flow_run_ctx = MagicMock() - flow_run_ctx.flow.name = mock_flow_json["name"] - flow_run_ctx.flow.description = mock_flow_json["description"] - flow_run_obj = FlowRun.parse_obj(mock_flow_run_json) - flow_run_ctx.flow_run.id = flow_run_obj.id - flow_run_ctx.flow_run.name = flow_run_obj.name - flow_run_ctx.flow_run.flow_id = flow_run_obj.flow_id - flow_run_ctx.flow_run.start_time = flow_run_obj.start_time - flow_run_ctx.task_run_futures = asyncio.run(mock_task_run_future()) - - with patch( - "prefect_datahub.datahub_emitter.TaskRunContext" - ) as mock_task_run_ctx, patch( - "prefect_datahub.datahub_emitter.FlowRunContext" - ) as mock_flow_run_ctx: - mock_task_run_ctx.get.return_value = task_run_ctx - mock_flow_run_ctx.get.return_value = flow_run_ctx - yield (task_run_ctx, flow_run_ctx) - - -async def mock_task_run(*args, **kwargs): - task_run_id = str(kwargs["task_run_id"]) - if task_run_id == "fa14a52b-d271-4c41-99cb-6b42ca7c070b": - return TaskRun.parse_obj(mock_extract_task_run_json) - elif task_run_id == "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7": - return TaskRun.parse_obj(mock_transform_task_run_json) - elif task_run_id == "f19f83ea-316f-4781-8cbe-1d5d8719afc3": - return TaskRun.parse_obj(mock_load_task_run_json) - return None - - -async def mock_flow(*args, **kwargs): - return Flow.parse_obj(mock_flow_json) - - -async def mock_flow_run(*args, **kwargs): - return FlowRun.parse_obj(mock_flow_run_json) - - -async def mock_flow_run_graph(*args, **kwargs): - response = Response() - response.status_code = 200 - response._content = json.dumps(mock_graph_json, separators=(",", ":")).encode( - "utf-8" - ) - return response - - -async def mock_api_healthcheck(*args, **kwargs): - return None - - -async def mock_read_workspaces(*args, **kwargs): - return [Workspace.parse_obj(mock_workspace_json)] - - -@pytest.fixture(scope="module") -def mock_prefect_client(): - prefect_client_mock = MagicMock() - prefect_client_mock.read_flow.side_effect = mock_flow - prefect_client_mock.read_flow_run.side_effect = mock_flow_run - prefect_client_mock.read_task_run.side_effect = mock_task_run - prefect_client_mock._client.get.side_effect = mock_flow_run_graph - with patch("prefect_datahub.datahub_emitter.orchestration") as mock_client: - mock_client.get_client.return_value = prefect_client_mock - yield prefect_client_mock - - -@pytest.fixture(scope="module") -def mock_prefect_cloud_client(): - prefect_cloud_client_mock = MagicMock() - prefect_cloud_client_mock.api_healthcheck.side_effect = mock_api_healthcheck - prefect_cloud_client_mock.read_workspaces.side_effect = mock_read_workspaces - with patch("prefect_datahub.datahub_emitter.cloud") as mock_client, patch( - "prefect_datahub.datahub_emitter.PREFECT_API_URL.value", - return_value="https://api.prefect.cloud/api/accounts/33e98cfe-ad06-4ceb-" - "a500-c11148499f75/workspaces/157eb822-1b3b-4338-ae80-98edd5d00cb9", - ): - mock_client.get_cloud_client.return_value = prefect_cloud_client_mock - yield prefect_cloud_client_mock diff --git a/metadata-ingestion-modules/prefect-plugin/tests/unit/test_datahub_emitter.py b/metadata-ingestion-modules/prefect-plugin/tests/unit/test_datahub_emitter.py index 1f03132b12210..52bdd10485c3c 100644 --- a/metadata-ingestion-modules/prefect-plugin/tests/unit/test_datahub_emitter.py +++ b/metadata-ingestion-modules/prefect-plugin/tests/unit/test_datahub_emitter.py @@ -1,12 +1,504 @@ import asyncio -from typing import List, Optional -from unittest.mock import Mock, patch +import json +import logging +from typing import Dict, List, Optional, cast +from unittest.mock import MagicMock, Mock, patch +from uuid import UUID +import pytest from datahub.api.entities.datajob import DataJob from datahub.utilities.urns.dataset_urn import DatasetUrn +from prefect.client.schemas import FlowRun, TaskRun, Workspace +from prefect.futures import PrefectFuture +from prefect.server.schemas.core import Flow +from prefect.task_runners import SequentialTaskRunner +from requests.models import Response from prefect_datahub.datahub_emitter import DatahubEmitter -from prefect_datahub.dataset import Dataset, _Entity +from prefect_datahub.entities import Dataset, _Entity + +mock_transform_task_json: Dict = { + "name": "transform", + "description": "Transform the actual data", + "task_key": "__main__.transform", + "tags": ["etl flow task"], +} +mock_extract_task_run_json: Dict = { + "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", + "created": "2023-06-06T05:51:54.822707+00:00", + "updated": "2023-06-06T05:51:55.126000+00:00", + "name": "Extract-0", + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_key": "__main__.extract", + "dynamic_key": "0", + "cache_key": None, + "cache_expiration": None, + "task_version": None, + "empirical_policy": { + "max_retries": 0, + "retry_delay_seconds": 0.0, + "retries": 0, + "retry_delay": 0, + "retry_jitter_factor": None, + }, + "tags": [], + "state_id": "e280decd-2cc8-4428-a70f-149bcaf95b3c", + "task_inputs": {}, + "state_type": "COMPLETED", + "state_name": "Completed", + "run_count": 1, + "flow_run_run_count": 1, + "expected_start_time": "2023-06-06T05:51:54.822183+00:00", + "next_scheduled_start_time": None, + "start_time": "2023-06-06T05:51:55.016264+00:00", + "end_time": "2023-06-06T05:51:55.096534+00:00", + "total_run_time": 0.08027, + "estimated_run_time": 0.08027, + "estimated_start_time_delta": 0.194081, + "state": { + "id": "e280decd-2cc8-4428-a70f-149bcaf95b3c", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.096534+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, +} +mock_transform_task_run_json: Dict = { + "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", + "created": "2023-06-06T05:51:55.160372+00:00", + "updated": "2023-06-06T05:51:55.358000+00:00", + "name": "transform-0", + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_key": "__main__.transform", + "dynamic_key": "0", + "cache_key": None, + "cache_expiration": None, + "task_version": None, + "empirical_policy": { + "max_retries": 0, + "retry_delay_seconds": 0.0, + "retries": 0, + "retry_delay": 0, + "retry_jitter_factor": None, + }, + "tags": [], + "state_id": "971ad82e-6e5f-4691-abab-c900358e96c2", + "task_inputs": { + "actual_data": [ + {"input_type": "task_run", "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b"} + ] + }, + "state_type": "COMPLETED", + "state_name": "Completed", + "run_count": 1, + "flow_run_run_count": 1, + "expected_start_time": "2023-06-06T05:51:55.159416+00:00", + "next_scheduled_start_time": None, + "start_time": "2023-06-06T05:51:55.243159+00:00", + "end_time": "2023-06-06T05:51:55.332950+00:00", + "total_run_time": 0.089791, + "estimated_run_time": 0.089791, + "estimated_start_time_delta": 0.083743, + "state": { + "id": "971ad82e-6e5f-4691-abab-c900358e96c2", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.332950+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, +} +mock_load_task_run_json: Dict = { + "id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", + "created": "2023-06-06T05:51:55.389823+00:00", + "updated": "2023-06-06T05:51:55.566000+00:00", + "name": "Load_task-0", + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_key": "__main__.load", + "dynamic_key": "0", + "cache_key": None, + "cache_expiration": None, + "task_version": None, + "empirical_policy": { + "max_retries": 0, + "retry_delay_seconds": 0.0, + "retries": 0, + "retry_delay": 0, + "retry_jitter_factor": None, + }, + "tags": [], + "state_id": "0cad13c8-84e4-4bcf-8616-c5904e10dcb4", + "task_inputs": { + "data": [ + {"input_type": "task_run", "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7"} + ] + }, + "state_type": "COMPLETED", + "state_name": "Completed", + "run_count": 1, + "flow_run_run_count": 1, + "expected_start_time": "2023-06-06T05:51:55.389075+00:00", + "next_scheduled_start_time": None, + "start_time": "2023-06-06T05:51:55.461812+00:00", + "end_time": "2023-06-06T05:51:55.535954+00:00", + "total_run_time": 0.074142, + "estimated_run_time": 0.074142, + "estimated_start_time_delta": 0.072737, + "state": { + "id": "0cad13c8-84e4-4bcf-8616-c5904e10dcb4", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.535954+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": True, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, +} +mock_flow_json: Dict = { + "id": "cc65498f-d950-4114-8cc1-7af9e8fdf91b", + "created": "2023-06-02T12:31:10.988697+00:00", + "updated": "2023-06-02T12:31:10.988710+00:00", + "name": "etl", + "description": "Extract transform load flow", + "tags": [], +} +mock_flow_run_json: Dict = { + "id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "created": "2023-06-06T05:51:54.544266+00:00", + "updated": "2023-06-06T05:51:55.622000+00:00", + "name": "olivine-beagle", + "flow_id": "cc65498f-d950-4114-8cc1-7af9e8fdf91b", + "state_id": "ca2db325-d98f-40e7-862e-449cd0cc9a6e", + "deployment_id": None, + "work_queue_name": None, + "flow_version": "3ba54dfa31a7c9af4161aa4cd020a527", + "parameters": {}, + "idempotency_key": None, + "context": {}, + "empirical_policy": { + "max_retries": 0, + "retry_delay_seconds": 0.0, + "retries": 0, + "retry_delay": 0, + "pause_keys": [], + "resuming": False, + }, + "tags": [], + "parent_task_run_id": None, + "state_type": "COMPLETED", + "state_name": "Completed", + "run_count": 1, + "expected_start_time": "2023-06-06T05:51:54.543357+00:00", + "next_scheduled_start_time": None, + "start_time": "2023-06-06T05:51:54.750523+00:00", + "end_time": "2023-06-06T05:51:55.596446+00:00", + "total_run_time": 0.845923, + "estimated_run_time": 0.845923, + "estimated_start_time_delta": 0.207166, + "auto_scheduled": False, + "infrastructure_document_id": None, + "infrastructure_pid": None, + "created_by": None, + "work_pool_name": None, + "state": { + "id": "ca2db325-d98f-40e7-862e-449cd0cc9a6e", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.596446+00:00", + "message": "All states completed.", + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": None, + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, +} +mock_graph_json: List[Dict] = [ + { + "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", + "name": "Extract-0", + "upstream_dependencies": [], + "state": { + "id": "e280decd-2cc8-4428-a70f-149bcaf95b3c", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.096534+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, + "expected_start_time": "2023-06-06T05:51:54.822183+00:00", + "start_time": "2023-06-06T05:51:55.016264+00:00", + "end_time": "2023-06-06T05:51:55.096534+00:00", + "total_run_time": 0.08027, + "estimated_run_time": 0.08027, + "untrackable_result": False, + }, + { + "id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", + "name": "Load_task-0", + "upstream_dependencies": [ + {"input_type": "task_run", "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7"} + ], + "state": { + "id": "0cad13c8-84e4-4bcf-8616-c5904e10dcb4", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.535954+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "f19f83ea-316f-4781-8cbe-1d5d8719afc3", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": True, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, + "expected_start_time": "2023-06-06T05:51:55.389075+00:00", + "start_time": "2023-06-06T05:51:55.461812+00:00", + "end_time": "2023-06-06T05:51:55.535954+00:00", + "total_run_time": 0.074142, + "estimated_run_time": 0.074142, + "untrackable_result": True, + }, + { + "id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", + "name": "transform-0", + "upstream_dependencies": [ + {"input_type": "task_run", "id": "fa14a52b-d271-4c41-99cb-6b42ca7c070b"} + ], + "state": { + "id": "971ad82e-6e5f-4691-abab-c900358e96c2", + "type": "COMPLETED", + "name": "Completed", + "timestamp": "2023-06-06T05:51:55.332950+00:00", + "message": None, + "data": {"type": "unpersisted"}, + "state_details": { + "flow_run_id": "c3b947e5-3fa1-4b46-a2e2-58d50c938f2e", + "task_run_id": "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7", + "child_flow_run_id": None, + "scheduled_time": None, + "cache_key": None, + "cache_expiration": None, + "untrackable_result": False, + "pause_timeout": None, + "pause_reschedule": False, + "pause_key": None, + "refresh_cache": None, + }, + }, + "expected_start_time": "2023-06-06T05:51:55.159416+00:00", + "start_time": "2023-06-06T05:51:55.243159+00:00", + "end_time": "2023-06-06T05:51:55.332950+00:00", + "total_run_time": 0.089791, + "estimated_run_time": 0.089791, + "untrackable_result": False, + }, +] +mock_workspace_json: Dict = { + "account_id": "33e98cfe-ad06-4ceb-a500-c11148499f75", + "account_name": "shubhamjagtapgslabcom", + "account_handle": "shubhamjagtapgslabcom", + "workspace_id": "157eb822-1b3b-4338-ae80-98edd5d00cb9", + "workspace_name": "datahub", + "workspace_description": "", + "workspace_handle": "datahub", +} + + +async def mock_task_run_future(): + extract_prefect_future: PrefectFuture = PrefectFuture( + name=mock_extract_task_run_json["name"], + key=UUID("4552629a-ac04-4590-b286-27642292739f"), + task_runner=SequentialTaskRunner(), + ) + extract_prefect_future.task_run = cast( + None, TaskRun.parse_obj(mock_extract_task_run_json) + ) + transform_prefect_future: PrefectFuture = PrefectFuture( + name=mock_transform_task_run_json["name"], + key=UUID("40fff3e5-5ef4-4b8b-9cc8-786f91bcc656"), + task_runner=SequentialTaskRunner(), + ) + transform_prefect_future.task_run = cast( + None, TaskRun.parse_obj(mock_transform_task_run_json) + ) + load_prefect_future: PrefectFuture = PrefectFuture( + name=mock_load_task_run_json["name"], + key=UUID("7565f596-9eb0-4330-ba34-963e7839883e"), + task_runner=SequentialTaskRunner(), + ) + load_prefect_future.task_run = cast( + None, TaskRun.parse_obj(mock_load_task_run_json) + ) + return [extract_prefect_future, transform_prefect_future, load_prefect_future] + + +@pytest.fixture(scope="module") +def mock_run_logger(): + with patch( + "prefect_datahub.datahub_emitter.get_run_logger", + return_value=logging.getLogger(), + ) as mock_logger: + yield mock_logger + + +@pytest.fixture(scope="module") +def mock_run_context(mock_run_logger): + task_run_ctx = MagicMock() + task_run_ctx.task.task_key = mock_transform_task_json["task_key"] + task_run_ctx.task.name = mock_transform_task_json["name"] + task_run_ctx.task.description = mock_transform_task_json["description"] + task_run_ctx.task.tags = mock_transform_task_json["tags"] + + flow_run_ctx = MagicMock() + flow_run_ctx.flow.name = mock_flow_json["name"] + flow_run_ctx.flow.description = mock_flow_json["description"] + flow_run_obj = FlowRun.parse_obj(mock_flow_run_json) + flow_run_ctx.flow_run.id = flow_run_obj.id + flow_run_ctx.flow_run.name = flow_run_obj.name + flow_run_ctx.flow_run.flow_id = flow_run_obj.flow_id + flow_run_ctx.flow_run.start_time = flow_run_obj.start_time + flow_run_ctx.task_run_futures = asyncio.run(mock_task_run_future()) + + with patch( + "prefect_datahub.datahub_emitter.TaskRunContext" + ) as mock_task_run_ctx, patch( + "prefect_datahub.datahub_emitter.FlowRunContext" + ) as mock_flow_run_ctx: + mock_task_run_ctx.get.return_value = task_run_ctx + mock_flow_run_ctx.get.return_value = flow_run_ctx + yield (task_run_ctx, flow_run_ctx) + + +async def mock_task_run(*args, **kwargs): + task_run_id = str(kwargs["task_run_id"]) + if task_run_id == "fa14a52b-d271-4c41-99cb-6b42ca7c070b": + return TaskRun.parse_obj(mock_extract_task_run_json) + elif task_run_id == "dd15ee83-5d28-4bf1-804f-f84eab9f9fb7": + return TaskRun.parse_obj(mock_transform_task_run_json) + elif task_run_id == "f19f83ea-316f-4781-8cbe-1d5d8719afc3": + return TaskRun.parse_obj(mock_load_task_run_json) + return None + + +async def mock_flow(*args, **kwargs): + return Flow.parse_obj(mock_flow_json) + + +async def mock_flow_run(*args, **kwargs): + return FlowRun.parse_obj(mock_flow_run_json) + + +async def mock_flow_run_graph(*args, **kwargs): + response = Response() + response.status_code = 200 + response._content = json.dumps(mock_graph_json, separators=(",", ":")).encode( + "utf-8" + ) + return response + + +async def mock_api_healthcheck(*args, **kwargs): + return None + + +async def mock_read_workspaces(*args, **kwargs): + return [Workspace.parse_obj(mock_workspace_json)] + + +@pytest.fixture(scope="module") +def mock_prefect_client(): + prefect_client_mock = MagicMock() + prefect_client_mock.read_flow.side_effect = mock_flow + prefect_client_mock.read_flow_run.side_effect = mock_flow_run + prefect_client_mock.read_task_run.side_effect = mock_task_run + prefect_client_mock._client.get.side_effect = mock_flow_run_graph + with patch("prefect_datahub.datahub_emitter.orchestration") as mock_client: + mock_client.get_client.return_value = prefect_client_mock + yield prefect_client_mock + + +@pytest.fixture(scope="module") +def mock_prefect_cloud_client(): + prefect_cloud_client_mock = MagicMock() + prefect_cloud_client_mock.api_healthcheck.side_effect = mock_api_healthcheck + prefect_cloud_client_mock.read_workspaces.side_effect = mock_read_workspaces + with patch("prefect_datahub.datahub_emitter.cloud") as mock_client, patch( + "prefect_datahub.datahub_emitter.PREFECT_API_URL.value", + return_value="https://api.prefect.cloud/api/accounts/33e98cfe-ad06-4ceb-" + "a500-c11148499f75/workspaces/157eb822-1b3b-4338-ae80-98edd5d00cb9", + ): + mock_client.get_cloud_client.return_value = prefect_cloud_client_mock + yield prefect_cloud_client_mock @patch("prefect_datahub.datahub_emitter.DatahubRestEmitter", autospec=True) diff --git a/metadata-ingestion-modules/prefect-plugin/tox.ini b/metadata-ingestion-modules/prefect-plugin/tox.ini deleted file mode 100644 index 0b8118e2d3f1f..0000000000000 --- a/metadata-ingestion-modules/prefect-plugin/tox.ini +++ /dev/null @@ -1,35 +0,0 @@ -# tox (https://tox.readthedocs.io/) is a tool for running tests -# in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" -# and then run "tox" from this directory. - -[tox] -envlist = py3-quick,py3-full - -[gh-actions] -python = - 3.6: py3-full - 3.9: py3-full - -# Providing optional features that add dependencies from setup.py as deps here -# allows tox to recreate testenv when new dependencies are added to setup.py. -# Previous approach of using the tox global setting extras is not recommended -# as extras is only called when the testenv is created for the first time! -# see more here -> https://github.com/tox-dev/tox/issues/1105#issuecomment-448596282 - -[testenv] -deps = - -e ../../metadata-ingestion/[.dev] -commands = - pytest --cov={envsitepackagesdir}/datahub --cov={envsitepackagesdir}/datahub_provider \ - py3-quick: -m 'not integration and not slow_integration' --junit-xml=junit.quick.xml \ - py3-full: --cov-fail-under 65 --junit-xml=junit.full.xml \ - --continue-on-collection-errors \ - -vv - -setenv = - PREFECT_HOME = /tmp/prefect/thisshouldnotexist-{envname} - -[testenv:py3-full] -deps = - ../../metadata-ingestion/.[dev] From 021eedef16597351788717061f14bcf21a905d69 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Fri, 9 Feb 2024 18:29:10 +0530 Subject: [PATCH 21/21] Modify prefect-plugin yml file --- .github/workflows/prefect-plugin.yml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index 18cbd79f1156c..47bf417029330 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -10,9 +10,9 @@ on: - "metadata-models/**" pull_request: branches: - - master + - "**" paths: - - ".github/**" + - ".github/workflows/prefect-plugin.yml" - "metadata-ingestion-modules/prefect-plugin/**" - "metadata-ingestion/**" - "metadata-models/**" @@ -37,6 +37,12 @@ jobs: - python-version: "3.10" fail-fast: false steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: @@ -44,19 +50,20 @@ jobs: cache: "pip" - name: Install dependencies run: ./metadata-ingestion/scripts/install_deps.sh - - name: Install prefect package and test (extras ${{ matrix.extraPythonRequirement }}) - run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:prefect-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick + - name: Install prefect package + run: ./gradlew :metadata-ingestion-modules:prefect-plugin:lint :metadata-ingestion-modules:prefect-plugin:testQuick - name: pip freeze show list installed if: always() run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && pip freeze - uses: actions/upload-artifact@v3 - if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'prefect>=2.0.0' }} + if: ${{ always() && matrix.python-version == '3.10'}} with: name: Test Results (Prefect Plugin ${{ matrix.python-version}}) path: | **/build/reports/tests/test/** **/build/test-results/test/** **/junit.*.xml + !**/binary/** - name: Upload coverage to Codecov if: always() uses: codecov/codecov-action@v3 @@ -64,8 +71,8 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} directory: . fail_ci_if_error: false - flags: prefect-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }} - name: pytest-prefect + flags: prefect,prefect-${{ matrix.extra_pip_extras }} + name: pytest-prefect-${{ matrix.python-version }} verbose: true event-file: