From 46b51b78a49bb8a50843c2418a49065092ab3219 Mon Sep 17 00:00:00 2001 From: Dominik Jagoda <114922270+djagoda881@users.noreply.github.com> Date: Thu, 22 Aug 2024 11:15:23 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20orchestration=20module=20(#91?= =?UTF-8?q?7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * πŸ› Fixed bug in `viadot-lite.Dockerfile` * πŸ”– Upgraded version to `2.0.0-alpha.1` * πŸ‘· Updated `docker-publish.yml` * 🚚 Moved `orchiestration` folder into `src/viadot` * 🚚 Renamed path from `prefect-viadot-test` to `prefect-test` * πŸ”– Bumped version to `2.0.0-alpha.2` * ♻️ Synchronized `prefect-viadot` with `orchiestration/prefect` * πŸ› Fixed import in `test_git.py` * 🧱 Updated `docker-compose.yml` * 🚚 Moved `prefect_viadot` to `src/viadot/orchestration` * 🚚 Changes imports in prefect-viadot * ⬆️ Added prefect-viadot dependencies to viadot * ⬆️ Upgraded `prefect` dependencie * πŸ”§ Updated `Dockerfile` * ⬆️ Upgraded dependecies * πŸ”₯ Depreacted `datahub.py` * βž• Added `viadot-azure` and `viadot-aws` dependecies * 🧱 Added `viadot-azure.Dockerfile` * πŸ› Added import error handlig to all optional sources * πŸ› Fixed adls import * 🧱 Added `viadot-aws.Dockerfile` * πŸ› Fixed import errors in `prefect-viadot` * βœ… Added prefect-viadot test and refactored viadot tests * πŸ™ˆ Updated .gitignore file * βž• Added new dev dependencies * 🧱 Removed not needed packages from `viadot-azure.Dockerfile` * βž• Added dependecies to `pyproject.toml` * ⬆️ Upgraded `viadot-azure` packages * πŸ› Fixed imports in viadot integration tests * 🧱 Refacroed `viadot-azure.Dockerfile` * ⬆️ Upgraded aws dependecies in `pyproject.toml` * ⬆️ Upgraded dependecies * 🧱 Added viadot-lite image * ♻️ Refactored viadot-aws image * 🧱 Updated `docker-compose.yml` * πŸ› Fixed bug in `viadot-lite.Dockerfile` * πŸ”– Upgraded version to `2.0.0-alpha.1` * πŸ‘· Updated `docker-publish.yml` * 🚚 Moved `orchiestration` folder into `src/viadot` * 🚚 Renamed path from `prefect-viadot-test` to `prefect-test` * πŸ”– Bumped version to `2.0.0-alpha.2` * ♻️ Synchronized `prefect-viadot` with `orchiestration/prefect` * πŸ› Fixed import in `test_git.py` * 🧱 Updated `docker-compose.yml` * βž• Added docs dependencies * 🎨 Fixed rye formatting * βž– Removed duplicated dependecies * πŸ› Fixed mkdocs config bug * 🧱 Moved images into one multistage `Dockerfile` (#932) * 🧱 Created multi-stage build of docker images * πŸ”₯ Removed old Dockerfiles * πŸ‘· Updated `docker-publish.yml` * 🧱 Removed not more needed `.lock` files * 🧱 Added `rye` into docker container * 🧱 Left rye inside Docker image * πŸ”– Bumped version to `2.0.0-alpha.3` * ⬇️ Downgraded `requests` package * πŸ”– Bumped to `2.0.0-alpha.4` version * 🧱 Upgraded images in `docker-compose.yml` * Add documentation for viadot 2.0 with new repository structure (#929) * πŸ“ Created new directory structure for references tab * πŸ“ Added `Getting Started` section in docs * πŸ“ Added `User Guide` section * πŸ“ Refactored docs structure * πŸ“ Added new user guide * ✨ Added script to synchronize `.lock` files * πŸ“ Added `Managing dependencies` section in docs * πŸ“ Fixed typos in docs * πŸ“ Improved tutorial about adding source and flows * πŸ“ Removed `Manging dependecies section` * πŸ“ Added flow and task referencies * πŸ“ Updated link in documentation * πŸ“ Updated docs in `user_guide/config_key.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Updated docs in `user_guide/adding_source.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Updated docs in `user_guide/adding_prefect_flow.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Updated docs in `user_guide/adding_prefect_flow.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Updated docs in `user_guide/adding_prefect_flow.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Added description about `to_df()` in `adding_source.md` * πŸ“ Improved docs in` adding_prefect_flow.md` * πŸ“ Removed badges from `index.md` * πŸ“ Added `Advanced Usage` section * πŸ“ Moved docs about Rye into `CONTRIBUTING.md` * πŸ“ Moved docker tutorial section form docs to `CONTRIBUTING.md` * πŸ“ Updated `CONTRIBUTING.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Removed Rye description from `CONTRIBUTING.md` --------- Co-authored-by: MichaΕ‚ Zawadzki * 🚚 Moved `prefect_viadot` to `src/viadot/orchestration` * 🚚 Changes imports in prefect-viadot * ⬆️ Added prefect-viadot dependencies to viadot * ⬆️ Upgraded `prefect` dependencie * πŸ”§ Updated `Dockerfile` * ⬆️ Upgraded dependecies * πŸ”₯ Depreacted `datahub.py` * βž• Added `viadot-azure` and `viadot-aws` dependecies * 🧱 Added `viadot-azure.Dockerfile` * πŸ› Added import error handlig to all optional sources * πŸ› Fixed adls import * 🧱 Added `viadot-aws.Dockerfile` * πŸ› Fixed import errors in `prefect-viadot` * βœ… Added prefect-viadot test and refactored viadot tests * πŸ™ˆ Updated .gitignore file * βž• Added new dev dependencies * 🧱 Removed not needed packages from `viadot-azure.Dockerfile` * βž• Added dependecies to `pyproject.toml` * ⬆️ Upgraded `viadot-azure` packages * πŸ› Fixed imports in viadot integration tests * 🧱 Refacroed `viadot-azure.Dockerfile` * ⬆️ Upgraded aws dependecies in `pyproject.toml` * ⬆️ Upgraded dependecies * 🧱 Added viadot-lite image * ♻️ Refactored viadot-aws image * 🧱 Updated `docker-compose.yml` * πŸ› Fixed bug in `viadot-lite.Dockerfile` * πŸ”– Upgraded version to `2.0.0-alpha.1` * πŸ‘· Updated `docker-publish.yml` * 🚚 Moved `orchiestration` folder into `src/viadot` * 🚚 Renamed path from `prefect-viadot-test` to `prefect-test` * πŸ”– Bumped version to `2.0.0-alpha.2` * ♻️ Synchronized `prefect-viadot` with `orchiestration/prefect` * πŸ› Fixed import in `test_git.py` * 🧱 Updated `docker-compose.yml` * βž• Added docs dependencies * 🎨 Fixed rye formatting * βž– Removed duplicated dependecies * 🧱 Moved images into one multistage `Dockerfile` (#932) * 🧱 Created multi-stage build of docker images * πŸ”₯ Removed old Dockerfiles * πŸ‘· Updated `docker-publish.yml` * 🧱 Removed not more needed `.lock` files * 🧱 Added `rye` into docker container * 🧱 Left rye inside Docker image * πŸ”– Bumped version to `2.0.0-alpha.3` * ⬇️ Downgraded `requests` package * πŸ”– Bumped to `2.0.0-alpha.4` version * 🧱 Upgraded images in `docker-compose.yml` * Add documentation for viadot 2.0 with new repository structure (#929) * πŸ“ Created new directory structure for references tab * πŸ“ Added `Getting Started` section in docs * πŸ“ Added `User Guide` section * πŸ“ Refactored docs structure * πŸ“ Added new user guide * ✨ Added script to synchronize `.lock` files * πŸ“ Added `Managing dependencies` section in docs * πŸ“ Fixed typos in docs * πŸ“ Improved tutorial about adding source and flows * πŸ“ Removed `Manging dependecies section` * πŸ“ Added flow and task referencies * πŸ“ Updated link in documentation * πŸ“ Updated docs in `user_guide/config_key.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Updated docs in `user_guide/adding_source.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Updated docs in `user_guide/adding_prefect_flow.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Updated docs in `user_guide/adding_prefect_flow.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Updated docs in `user_guide/adding_prefect_flow.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Added description about `to_df()` in `adding_source.md` * πŸ“ Improved docs in` adding_prefect_flow.md` * πŸ“ Removed badges from `index.md` * πŸ“ Added `Advanced Usage` section * πŸ“ Moved docs about Rye into `CONTRIBUTING.md` * πŸ“ Moved docker tutorial section form docs to `CONTRIBUTING.md` * πŸ“ Updated `CONTRIBUTING.md` Co-authored-by: MichaΕ‚ Zawadzki * πŸ“ Removed Rye description from `CONTRIBUTING.md` --------- Co-authored-by: MichaΕ‚ Zawadzki * ✨ Added new param to `sharepoint_to_readshift_spectrum` * ✨ Added new param to `sharepoint.py` * ✨ Added `basename_template` to MinIO source * ✨ Added `SQLServer` source and tasks for it * ✨ Added handling for `DatabaseCredentials` and `Secret` in get_credentials * ✨ Added `df_to_minio` task for prefect * Added `sql_server_to_minio` flow for prefect * βœ… Added tests sql_server_to_minio * πŸ“ Updated changelog with `sql_server_to_mino` and related functions * πŸ› Added missing package to Dockerfile * ⬆️ Upgraded `prefect` version to `2.19.7` * πŸ”– Bumped viadot version to `2.0.0-alpha.5` * βœ… Added tests * 🎨 Updated credentials options * πŸ”§ Updated docker setup * 🎨 Updated data type * 🎨 Added contexlib for MinIO * πŸ“ Updated requirements.lock `s * πŸ“ Updates SQL Server docs * 🎨 Added whitespaces * ⬇️ Downgraded dependecies * πŸ”– Bumped viadot to version `2.0.0-alpha.6` * πŸ“ updated CHANGELOG.md * ✨ updated Outlook connector version 1. * ✨ updated Outlook connector version 2. * πŸ“ updated docstrings. * βœ… added outlook test file. * πŸ‘” updated some files to aling the rebase. * πŸ“ updated CHANGELOG.md * ✨ added Hubspot connector version 1. * βœ… added hubspot test file. * πŸ“ updated docstrings. * βœ… updated local lock file. * πŸ”Š updated logger in source. * πŸ‘” updated some files to aling the rebase. * πŸ‘” updated some more files to aling the rebase. * πŸ“ updated CHANGELOG. * ✨ added Mindful to __init__ files. * ✨ created new Minsful connector. * 🎨 updated mindful flow and task connector. * βœ… added mindful test file. * πŸ“ updated mindful docstrings. * ⚑️ added sep parameter in adls task. * πŸ”Š updated logs. * πŸ“ updated docstrings. * πŸ”Š updated logger in source. * πŸ‘” updated some files to aling the rebase. * πŸ“ update CHANGELOG.md and __init__ files. * ✨ added Genesys file structure version 1. * πŸ“ updated rebased files. * ✨ added Genesys file structure version 2. * ✨ added Genesys file structure version 3. * πŸ“ adding some extra log information. * ✨ added Genesys file structure version 4. * βœ… added genesys test files. * βœ… upsted genesys test file. * πŸ”Š updated logger in source. * πŸ‘” updated some files to aling the rebase. * πŸ“ updated docstring. * 🎨 implemented flake8 and pylint tests. * πŸ’„ added prints to source level. * πŸ“ updated variable names. * Duckdb connectors (#945) * 🚚 Changed tasks utils location * ✨ Created DuckDB connectors * ✨ Created BCP task * 🎨 Formatted code with black * βœ… Added tests * πŸ“ Updated changelog with duckdb connectors * πŸ”₯ Removed irrelevant docstring * πŸ”₯ Removed irrelevant code * 🎨 Cleaned up the code * 🎨 Cleaned up the code * πŸ“ Updated docstring * βœ… Updated DuckDB test * πŸ”₯ removed else statement * βͺ Reverted change from previous commit --------- Co-authored-by: angelika233 * Delete .python_history * βœ… updated test file. * 🎨 updated code performance. * βœ… updated test file. * c4c code checker passed and tests coverage passed * 🎨 updated code performance. * βœ… updated test file. * 🎨 updated code performance. * βœ… updated test file. * flows_tasks_for c4c * βœ… updated test file to reach 80% coverage. * ✏️ corrected a typo. * βœ… updated test file to reach 80% coverage. * βœ… updated test file. * ✏️ fixed a typo. * ✏️ fixed another typo. * ✨ Added sap_to_parquet flow (#947) * ✨ Added sap_to_parquet flow and tests * ⚑️Change parameters names * 🎨 Changed credentials * 🎨 Change creds * πŸ“ Updated changelog * 🎨 Formatted code with black * πŸ“ Improved docstring * πŸ“ Update docstring * βœ… Updated test * ✏️ Fixed typo in sql server source * πŸ“ Added info about typo to changelog * βœ… updated test file to reach 80% coverage. * βœ… updated test file. * βœ… updated test file. * βœ… updated test file to reach 80% coverage. * 🦺 added `return` in flow file. * 🦺 added `return` in flow file. * 🦺 added `return` in flow file. * 🦺 added `return` in flow file. * βœ… added test integration file. * βœ… added test integration file. * βœ… added test integration file. * πŸ“ updated credential typo. * βœ… added test integration file. * βž• Added `duckdb` to dependecies * βž• Added `prefect-aws` dependecy * πŸš€ Relase 2.0.0-beta.1 * cloud for customer improvement * recover gitignore * removing unuseless files * docker initial * rollback gitignore * update ignore * rollback gitignore * remove unuseless file * Sharepoint orchestration code refactor (#950) * ✨ Moved sharepoint tasks from prefect_viadot repo * ✨ Moved sharepoint_to_redshift_spectrum flow from prefect_viadot repo * πŸ”₯ Cleaned up init for prefect tasks * Added `viadot.orchestration.prefect` * Sharepoint - multiple files logic applied to the source class (#942) * ⬆️ Relax sql-metadata version requirement (#940) * ⬆️ Relax sql-metadata version requirement * πŸ“Œ Update lockfiles * ✨ Added `validate_and_reorder_dfs_columns` to utils * ♻️ Added new version of Sharepoint source class with additional functions * βœ… added tests for `validate_and_reorder_dfs_columns` function * βœ… Created `sharepoint_mock` function and changed function name to `_download_file_stream` * πŸ“ Updated docstring for Sharepoint source class and functions * ⬆️ Relax sql-metadata version requirement (#940) * ⬆️ Relax sql-metadata version requirement * πŸ“Œ Update lockfiles * 🚧 Modified `validate_and_reorder_dfs_columns` * πŸ› Added `na_values` to `_load_and_parse` function * πŸ› Added tests for Sharepoint functions * πŸ› Added **kwargs to handle_multiple_files function * 🚧 Added `dtypes=str` instead of functions * βœ… Removed tests for not existing functions * βœ… Added missing tests * βœ… Added missing tests to sharepoint class methods --------- Co-authored-by: MichaΕ‚ Zawadzki Co-authored-by: Marcin Purtak <44641138+marcinpurtak@users.noreply.github.com> * ✨ Added 0365 (#969) * ✨ Added 0365 * 🚧 Moved `0365` to dependencies * Orchestration last changes (#953) * 🚚 Move manual actions to a subfolder * πŸ› Fix the incorrect test dir structure & duplication * πŸ‘· Add CD workflow * πŸ“Œ Update MSSQL driver version & pin mssql-tools * πŸ“ Update container instructions * πŸ‘· Add linter rules * πŸ› Fix typo * ♻️ Do not install Databricks by default * ♻️ Use standard file name * ♻️ Readd commented out test * ♻️ Refactor some weird stuff * ♻️ Fix typo * ♻️ Remove duplicated docstrings * ♻️ Remove clutter * 🎨 Linting * 🚨 Docs & linting * πŸ“ Improve the user guide * πŸ“ Further docs improvements * πŸ“ Further docs improvements * πŸ“ Docs some more * πŸ“ Docs - final touches * πŸ“ Improve example * 🚚 Move to correct path * πŸ”₯ Remove duplicate tests * ♻️ Update tests * πŸ“ Minor improvement * 🚨 Lint utils * πŸ”’οΈ Remove the insecure `credentials` param * 🚨 More linting * πŸ“ Add SAP RFC installation instructions * 🚨 More linting * ⬆️ Bump pyarrow Fixes #970 * πŸ“Œ Update lockfiles * βœ… Fix all unit tests * πŸ”₯ Remove dead code * 🚨 Lint tests * βœ… Skip broken tests * 🚨 Lint all remaining tests * 🚨 Fix remaining linter warnings * πŸ“Œ Update lock files * ♻️ Minor fixes * πŸ§‘β€πŸ’» Also publish `latest` tags for all images For use eg. in the docker-compose file. * πŸ› Fix typo * ✨ Add GitHub release step * πŸ“ Document the new release process * πŸ“Œ Bump version * ♻️ Add last changes from other branches * ♻️ Update some sources' test configuration to match rest of lib * πŸ“ Add more docs on contributing * πŸ“ Update a link * πŸ› Update lock files, removing optional deps * ⬆️ Update dependencies * 🚨 Linting * πŸ› Add TOML support to coverage * βœ… Fix `_cast_df()` test failing on datetimes in pandas 2.0 * ⬆️ Run CI on Python 3.12 * βž– Remove unused `pytest-cov` * ⬆️ Upgrade Python version so Rye CI action uses 3.12 * ⬆️ Upgrade Python to 3.12 in the images * πŸ“ Improve container env docs * ⬇️ Rollback `pyarrow` to v10.x Also roll back Python to 3.10 as this `pyarrow` version is not compatible with Python 3.12. * ♻️ Use a `skip_test_on_missing_extra()` utils to simplify life * πŸ§‘β€πŸ’» Install dev dependencies in local containers * πŸ› Fix for broken `numpy` version * 🚧 RedshiftSpectrum source unit tests - WIP --------- Co-authored-by: Diego-H-S Co-authored-by: MichaΕ‚ Zawadzki Co-authored-by: angelika233 Co-authored-by: Angelika Tarnawa <59344718+angelika233@users.noreply.github.com> Co-authored-by: fdelgadodyvenia Co-authored-by: Natalia Walczak <74621908+judynah@users.noreply.github.com> Co-authored-by: Diego <108733861+Diego-H-S@users.noreply.github.com> Co-authored-by: Fabio Delgado <126676214+fdelgadodyvenia@users.noreply.github.com> Co-authored-by: RafaΕ‚ Ziemianek <49795849+Rafalz13@users.noreply.github.com> Co-authored-by: Marcin Purtak <44641138+marcinpurtak@users.noreply.github.com> --- .dockerignore | 11 +- .github/workflows/ad_hoc/docker-publish.yml | 67 + .../{ => ad_hoc}/publish_to_pypi.yml | 6 +- .github/workflows/build-2.0.yml | 37 - .github/workflows/build.yml | 124 - .github/workflows/cd.yml | 127 + .github/workflows/{ci-2.0.yml => ci.yml} | 4 +- .github/workflows/deploy_docs.yml | 17 - .github/workflows/docker-publish.yml | 46 - .github/workflows/release.yml | 128 - .gitignore | 19 +- .vscode/install_extensions.sh | 10 +- CHANGELOG.md | 19 + CONTRIBUTING.md | 137 +- README.md | 41 +- config.yaml.example | 49 +- docker/Dockerfile | 125 +- docker/docker-compose.yml | 28 +- docker/entrypoint.sh | 11 + docker/odbcinst.ini | 2 +- docs/advanced_usage/containerized_env.md | 45 + docs/advanced_usage/index.md | 7 + .../developer_guide/contributing_to_viadot.md | 9 + .../creating_a_prefect_flow.md | 165 + docs/developer_guide/creating_a_source.md | 321 + docs/developer_guide/index.md | 5 + docs/getting_started.md | 102 + docs/howtos/config_file.md | 42 - .../howto_migrate_sources_tasks_and_flows.md | 52 - docs/index.md | 89 +- docs/references/api_sources.md | 5 - .../references/orchestration/prefect/flows.md | 19 + .../references/orchestration/prefect/tasks.md | 23 + docs/references/sources/api_sources.md | 17 + docs/references/sources/sql_sources.md | 19 + docs/references/sql_sources.md | 8 - docs/tutorials/adding_source.md | 13 - docs/tutorials/databricks.md | 5 - docs/tutorials/supermetrics.md | 13 - mkdocs.yml | 27 +- pyproject.toml | 199 +- requirements-dev.lock | 697 +- requirements.lock | 716 +- src/viadot/__init__.py | 3 + src/viadot/config.py | 83 +- src/viadot/examples/__init__.py | 1 + src/viadot/examples/sap_rfc/README.md | 11 +- src/viadot/exceptions.py | 53 +- src/viadot/orchestration/__init__.py | 1 + src/viadot/orchestration/prefect/__init__.py | 1 + .../orchestration/prefect/exceptions.py | 26 + .../orchestration/prefect/flows/__init__.py | 46 + .../flows/cloud_for_customers_to_adls.py | 73 + .../cloud_for_customers_to_databricks.py | 76 + .../prefect/flows/duckdb_to_parquet.py | 57 + .../prefect/flows/duckdb_to_sql_server.py | 121 + .../prefect/flows/duckdb_transform.py | 42 + .../prefect/flows/exchange_rates_to_adls.py | 77 + .../flows/exchange_rates_to_databricks.py | 83 + .../prefect/flows/genesys_to_adls.py | 113 + .../prefect/flows/hubspot_to_adls.py | 81 + .../prefect/flows/mindful_to_adls.py | 99 + .../prefect/flows/outlook_to_adls.py | 97 + .../prefect/flows/sap_to_parquet.py | 77 + .../prefect/flows/sap_to_redshift_spectrum.py | 114 + .../prefect/flows/sharepoint_to_adls.py | 61 + .../prefect/flows/sharepoint_to_databricks.py | 74 + .../flows/sharepoint_to_redshift_spectrum.py | 116 + .../prefect/flows/sharepoint_to_s3.py | 47 + .../prefect/flows/sql_server_to_minio.py | 61 + .../orchestration/prefect/flows/transform.py | 140 + .../prefect/flows/transform_and_catalog.py | 183 + .../orchestration/prefect/tasks/__init__.py | 51 + .../orchestration/prefect/tasks/adls.py | 92 + src/viadot/orchestration/prefect/tasks/bcp.py | 71 + .../prefect/tasks/cloud_for_customers.py | 53 + .../orchestration/prefect/tasks/databricks.py | 77 + src/viadot/orchestration/prefect/tasks/dbt.py | 71 + .../orchestration/prefect/tasks/duckdb.py | 52 + .../prefect/tasks/exchange_rates.py | 86 + .../orchestration/prefect/tasks/genesys.py | 108 + src/viadot/orchestration/prefect/tasks/git.py | 68 + .../orchestration/prefect/tasks/hubspot.py | 70 + .../orchestration/prefect/tasks/luma.py | 90 + .../orchestration/prefect/tasks/mindful.py | 79 + .../orchestration/prefect/tasks/minio.py | 58 + .../orchestration/prefect/tasks/outlook.py | 83 + .../prefect/tasks/redshift_spectrum.py | 75 + src/viadot/orchestration/prefect/tasks/s3.py | 57 + .../orchestration/prefect/tasks/sap_rfc.py | 123 + .../orchestration/prefect/tasks/sharepoint.py | 113 + .../orchestration/prefect/tasks/sql_server.py | 140 + .../orchestration/prefect/tasks/task_utils.py | 244 + src/viadot/orchestration/prefect/utils.py | 228 + src/viadot/signals.py | 5 +- src/viadot/sources/__init__.py | 62 +- src/viadot/sources/azure_data_lake.py | 198 +- src/viadot/sources/base.py | 180 +- src/viadot/sources/cloud_for_customers.py | 197 +- src/viadot/sources/databricks.py | 226 +- src/viadot/sources/duckdb.py | 258 + src/viadot/sources/exchange_rates.py | 136 +- src/viadot/sources/genesys.py | 1098 +- src/viadot/sources/hubspot.py | 376 + src/viadot/sources/mindful.py | 216 + src/viadot/sources/minio.py | 99 +- src/viadot/sources/outlook.py | 355 + src/viadot/sources/redshift_spectrum.py | 175 +- src/viadot/sources/s3.py | 155 +- src/viadot/sources/sap_rfc.py | 622 +- src/viadot/sources/sharepoint.py | 403 +- src/viadot/sources/sql_server.py | 117 + src/viadot/sources/sqlite.py | 27 +- src/viadot/sources/trino.py | 193 +- src/viadot/sources/uk_carbon_intensity.py | 34 +- src/viadot/task_utils.py | 456 - src/viadot/utils.py | 394 +- tests/.env.example | 35 + tests/.env.template | 12 - tests/__init__.py | 0 tests/conftest.py | 122 +- tests/integration/__init__.py | 1 + .../prefect/flows/test_cloud_for_customers.py | 53 + .../prefect/flows/test_duckdb_to_parquet.py | 30 + .../flows/test_duckdb_to_sql_server.py | 50 + .../prefect/flows/test_duckdb_transform.py | 36 + .../prefect/flows/test_exchange_rates.py | 47 + .../prefect/flows/test_genesys_to_adls.py | 19 + .../prefect/flows/test_sap_to_parquet.py | 29 + .../flows/test_sap_to_redshift_spectrum.py | 30 + .../prefect/flows/test_sharepoint_to_adls.py | 25 + .../test_sharepoint_to_redshift_spectrum.py | 29 + .../prefect/flows/test_sharepoint_to_s3.py | 32 + .../prefect/flows/test_sql_server_to_minio.py | 29 + .../flows/test_transform_and_catalog.py | 31 + .../orchestration/prefect/tasks/test_adls.py | 39 + .../orchestration/prefect/tasks/test_bcp.py | 22 + .../prefect/tasks/test_cloud_for_customer.py | 15 + .../prefect/tasks/test_databricks.py | 54 + .../orchestration/prefect/tasks/test_dbt.py | 11 + .../prefect/tasks/test_duckdb.py | 35 + .../tasks/test_exchange_rates_tasks.py | 61 + .../orchestration/prefect/tasks/test_git.py | 17 + .../orchestration/prefect/tasks/test_luma.py | 49 + .../orchestration/prefect/tasks/test_minio.py | 20 + .../prefect/tasks/test_redshift_spectrum.py | 54 + .../orchestration/prefect/tasks/test_s3.py | 59 + .../prefect/tasks/test_sap_rfc.py | 18 + .../prefect/tasks/test_sharepoint_tasks.py | 37 + .../prefect/tasks/test_sql_server.py | 59 + tests/integration/test_azure_data_lake.py | 8 +- tests/integration/test_cloud_for_customers.py | 28 +- tests/integration/test_databricks.py | 68 +- tests/integration/test_exchange_rates.py | 4 +- tests/integration/test_genesys.py | 218 - tests/integration/test_hubspot.py | 13 + tests/integration/test_mindful.py | 20 + tests/integration/test_minio.py | 16 +- tests/integration/test_outlook.py | 16 + tests/integration/test_redshift_spectrum.py | 23 +- tests/integration/test_s3.py | 28 +- tests/integration/test_sharepoint.py | 3 +- tests/integration/test_sql_server.py | 35 + tests/integration/test_trino.py | 38 +- tests/pytest.ini | 7 + tests/resources/metadata/.luma/config.yaml | 11 + tests/resources/metadata/.luma/owners.yaml | 9 + tests/resources/metadata/model/catalog.json | 398 + tests/resources/metadata/model/manifest.json | 21680 ++++++++++++++++ .../metadata/model_run/run_results.json | 79 + tests/testfile.sqlite | Bin 8192 -> 0 bytes tests/unit/__init__.py | 1 + tests/unit/orchestration/prefect/test_git.py | 21 + tests/unit/test_cloud_for_customers.py | 78 + tests/unit/test_config.py | 12 +- tests/unit/test_duckdb.py | 127 + tests/unit/test_file.xlsx | Bin 4812 -> 11067 bytes tests/unit/test_genesys.py | 557 + tests/unit/test_hubspot.py | 214 + tests/unit/test_mindful.py | 146 + tests/unit/test_outlook.py | 184 + tests/unit/test_redshift_spectrum.py | 62 + tests/{integration => unit}/test_sap_rfc.py | 99 +- tests/unit/test_sap_rfc_2.py | 106 + tests/unit/test_sharepoint.py | 184 +- tests/unit/test_utils.py | 106 +- 186 files changed, 35113 insertions(+), 4340 deletions(-) create mode 100644 .github/workflows/ad_hoc/docker-publish.yml rename .github/workflows/{ => ad_hoc}/publish_to_pypi.yml (93%) delete mode 100644 .github/workflows/build-2.0.yml delete mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/cd.yml rename .github/workflows/{ci-2.0.yml => ci.yml} (93%) delete mode 100644 .github/workflows/deploy_docs.yml delete mode 100644 .github/workflows/docker-publish.yml delete mode 100644 .github/workflows/release.yml create mode 100644 docker/entrypoint.sh create mode 100644 docs/advanced_usage/containerized_env.md create mode 100644 docs/advanced_usage/index.md create mode 100644 docs/developer_guide/contributing_to_viadot.md create mode 100644 docs/developer_guide/creating_a_prefect_flow.md create mode 100644 docs/developer_guide/creating_a_source.md create mode 100644 docs/developer_guide/index.md create mode 100644 docs/getting_started.md delete mode 100644 docs/howtos/config_file.md delete mode 100644 docs/howtos/howto_migrate_sources_tasks_and_flows.md delete mode 100644 docs/references/api_sources.md create mode 100644 docs/references/orchestration/prefect/flows.md create mode 100644 docs/references/orchestration/prefect/tasks.md create mode 100644 docs/references/sources/api_sources.md create mode 100644 docs/references/sources/sql_sources.md delete mode 100644 docs/references/sql_sources.md delete mode 100644 docs/tutorials/adding_source.md delete mode 100644 docs/tutorials/databricks.md delete mode 100644 docs/tutorials/supermetrics.md create mode 100644 src/viadot/orchestration/__init__.py create mode 100644 src/viadot/orchestration/prefect/__init__.py create mode 100644 src/viadot/orchestration/prefect/exceptions.py create mode 100644 src/viadot/orchestration/prefect/flows/__init__.py create mode 100644 src/viadot/orchestration/prefect/flows/cloud_for_customers_to_adls.py create mode 100644 src/viadot/orchestration/prefect/flows/cloud_for_customers_to_databricks.py create mode 100644 src/viadot/orchestration/prefect/flows/duckdb_to_parquet.py create mode 100644 src/viadot/orchestration/prefect/flows/duckdb_to_sql_server.py create mode 100644 src/viadot/orchestration/prefect/flows/duckdb_transform.py create mode 100644 src/viadot/orchestration/prefect/flows/exchange_rates_to_adls.py create mode 100644 src/viadot/orchestration/prefect/flows/exchange_rates_to_databricks.py create mode 100644 src/viadot/orchestration/prefect/flows/genesys_to_adls.py create mode 100644 src/viadot/orchestration/prefect/flows/hubspot_to_adls.py create mode 100644 src/viadot/orchestration/prefect/flows/mindful_to_adls.py create mode 100644 src/viadot/orchestration/prefect/flows/outlook_to_adls.py create mode 100644 src/viadot/orchestration/prefect/flows/sap_to_parquet.py create mode 100644 src/viadot/orchestration/prefect/flows/sap_to_redshift_spectrum.py create mode 100644 src/viadot/orchestration/prefect/flows/sharepoint_to_adls.py create mode 100644 src/viadot/orchestration/prefect/flows/sharepoint_to_databricks.py create mode 100644 src/viadot/orchestration/prefect/flows/sharepoint_to_redshift_spectrum.py create mode 100644 src/viadot/orchestration/prefect/flows/sharepoint_to_s3.py create mode 100644 src/viadot/orchestration/prefect/flows/sql_server_to_minio.py create mode 100644 src/viadot/orchestration/prefect/flows/transform.py create mode 100644 src/viadot/orchestration/prefect/flows/transform_and_catalog.py create mode 100644 src/viadot/orchestration/prefect/tasks/__init__.py create mode 100644 src/viadot/orchestration/prefect/tasks/adls.py create mode 100644 src/viadot/orchestration/prefect/tasks/bcp.py create mode 100644 src/viadot/orchestration/prefect/tasks/cloud_for_customers.py create mode 100644 src/viadot/orchestration/prefect/tasks/databricks.py create mode 100644 src/viadot/orchestration/prefect/tasks/dbt.py create mode 100644 src/viadot/orchestration/prefect/tasks/duckdb.py create mode 100644 src/viadot/orchestration/prefect/tasks/exchange_rates.py create mode 100644 src/viadot/orchestration/prefect/tasks/genesys.py create mode 100644 src/viadot/orchestration/prefect/tasks/git.py create mode 100644 src/viadot/orchestration/prefect/tasks/hubspot.py create mode 100644 src/viadot/orchestration/prefect/tasks/luma.py create mode 100644 src/viadot/orchestration/prefect/tasks/mindful.py create mode 100644 src/viadot/orchestration/prefect/tasks/minio.py create mode 100644 src/viadot/orchestration/prefect/tasks/outlook.py create mode 100644 src/viadot/orchestration/prefect/tasks/redshift_spectrum.py create mode 100644 src/viadot/orchestration/prefect/tasks/s3.py create mode 100644 src/viadot/orchestration/prefect/tasks/sap_rfc.py create mode 100644 src/viadot/orchestration/prefect/tasks/sharepoint.py create mode 100644 src/viadot/orchestration/prefect/tasks/sql_server.py create mode 100644 src/viadot/orchestration/prefect/tasks/task_utils.py create mode 100644 src/viadot/orchestration/prefect/utils.py create mode 100644 src/viadot/sources/duckdb.py create mode 100644 src/viadot/sources/hubspot.py create mode 100644 src/viadot/sources/mindful.py create mode 100644 src/viadot/sources/outlook.py create mode 100644 src/viadot/sources/sql_server.py delete mode 100644 src/viadot/task_utils.py create mode 100644 tests/.env.example delete mode 100644 tests/.env.template delete mode 100644 tests/__init__.py create mode 100644 tests/integration/orchestration/prefect/flows/test_cloud_for_customers.py create mode 100644 tests/integration/orchestration/prefect/flows/test_duckdb_to_parquet.py create mode 100644 tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py create mode 100644 tests/integration/orchestration/prefect/flows/test_duckdb_transform.py create mode 100644 tests/integration/orchestration/prefect/flows/test_exchange_rates.py create mode 100644 tests/integration/orchestration/prefect/flows/test_genesys_to_adls.py create mode 100644 tests/integration/orchestration/prefect/flows/test_sap_to_parquet.py create mode 100644 tests/integration/orchestration/prefect/flows/test_sap_to_redshift_spectrum.py create mode 100644 tests/integration/orchestration/prefect/flows/test_sharepoint_to_adls.py create mode 100644 tests/integration/orchestration/prefect/flows/test_sharepoint_to_redshift_spectrum.py create mode 100644 tests/integration/orchestration/prefect/flows/test_sharepoint_to_s3.py create mode 100644 tests/integration/orchestration/prefect/flows/test_sql_server_to_minio.py create mode 100644 tests/integration/orchestration/prefect/flows/test_transform_and_catalog.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_adls.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_bcp.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_cloud_for_customer.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_databricks.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_dbt.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_duckdb.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_exchange_rates_tasks.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_git.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_luma.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_minio.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_redshift_spectrum.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_s3.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_sap_rfc.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_sharepoint_tasks.py create mode 100644 tests/integration/orchestration/prefect/tasks/test_sql_server.py delete mode 100644 tests/integration/test_genesys.py create mode 100644 tests/integration/test_hubspot.py create mode 100644 tests/integration/test_mindful.py create mode 100644 tests/integration/test_outlook.py create mode 100644 tests/integration/test_sql_server.py create mode 100644 tests/resources/metadata/.luma/config.yaml create mode 100644 tests/resources/metadata/.luma/owners.yaml create mode 100644 tests/resources/metadata/model/catalog.json create mode 100644 tests/resources/metadata/model/manifest.json create mode 100644 tests/resources/metadata/model_run/run_results.json delete mode 100644 tests/testfile.sqlite create mode 100644 tests/unit/orchestration/prefect/test_git.py create mode 100644 tests/unit/test_cloud_for_customers.py create mode 100644 tests/unit/test_duckdb.py create mode 100644 tests/unit/test_genesys.py create mode 100644 tests/unit/test_hubspot.py create mode 100644 tests/unit/test_mindful.py create mode 100644 tests/unit/test_outlook.py create mode 100644 tests/unit/test_redshift_spectrum.py rename tests/{integration => unit}/test_sap_rfc.py (53%) create mode 100644 tests/unit/test_sap_rfc_2.py diff --git a/.dockerignore b/.dockerignore index 8b2a185aa..c91a3175e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,8 +5,11 @@ __pycache__ .git .jupyter .local -.vscode -.pytest_cache -.venv -.config +.vscode/ +.pytest_cache/ +.venv/ +.config/ tests/ +.ruff_cache/ +.mypy_cache/ +build/ diff --git a/.github/workflows/ad_hoc/docker-publish.yml b/.github/workflows/ad_hoc/docker-publish.yml new file mode 100644 index 000000000..f8270920b --- /dev/null +++ b/.github/workflows/ad_hoc/docker-publish.yml @@ -0,0 +1,67 @@ +# Publish viadot images with a specified tag. +name: "Publish Docker images" +run-name: "Publish viadot-*:${{ github.event.inputs.tag }} images (@${{ github.actor }})" + +on: + workflow_dispatch: + inputs: + tag: + description: "The tag to use for the image." + required: true + default: "dev" + install_databricks: + description: "Whether to install Databricks source dependencies." + required: false + default: "false" +jobs: + docker: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + platforms: all + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log into GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and publish viadot-lite image + uses: docker/build-push-action@v3 + with: + context: . + file: docker/Dockerfile + platforms: linux/amd64 + push: true + target: viadot-lite + tags: ghcr.io/${{ github.repository }}/viadot-lite:${{ github.event.inputs.tag }} + + - name: Build and publish viadot-aws image + uses: docker/build-push-action@v3 + with: + context: . + file: docker/Dockerfile + platforms: linux/amd64 + push: true + target: viadot-aws + tags: ghcr.io/${{ github.repository }}/viadot-aws:${{ github.event.inputs.tag }} + + - name: Build and publish viadot-azure image + uses: docker/build-push-action@v3 + with: + context: . + file: docker/Dockerfile + platforms: linux/amd64 + push: true + target: viadot-azure + tags: ghcr.io/${{ github.repository }}/viadot-azure:${{ github.event.inputs.tag }} + build-args: INSTALL_DATABRICKS=${{ github.event.inputs.install_databricks }} diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/ad_hoc/publish_to_pypi.yml similarity index 93% rename from .github/workflows/publish_to_pypi.yml rename to .github/workflows/ad_hoc/publish_to_pypi.yml index f97d389fc..96b335f8b 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/ad_hoc/publish_to_pypi.yml @@ -18,11 +18,7 @@ jobs: with: python-version: "3.10" - name: Install pypa/build - run: >- - python3 -m - pip install - build - --user + run: python3 -m pip install build --user - name: Build a binary wheel and a source tarball run: python3 -m build diff --git a/.github/workflows/build-2.0.yml b/.github/workflows/build-2.0.yml deleted file mode 100644 index 3e2ff7bf3..000000000 --- a/.github/workflows/build-2.0.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: build-2.0 - -on: - push: - branches: - - "2.0" - -jobs: - publish-docker-image: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - with: - platforms: all - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Log into GitHub Container Registry - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build - uses: docker/build-push-action@v3 - with: - context: . - file: docker/Dockerfile - platforms: linux/amd64 - push: true - tags: ghcr.io/${{ github.repository }}/viadot:2.0-latest diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 977d0c53d..000000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: build - -on: - push: - branches: - - "main" - - "dev" - pull_request: - branches: - - "main" - - "dev" - -env: - IMAGE_NAME: viadot - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - # 3.8+. 3.9 should be supported by late 2021. - python-version: [3.8] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Cache pip - id: cache-pip - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - - name: Install dependencies - run: | - pip install -r requirements.txt - if: steps.cache.outputs.cache-hit != 'true' - continue-on-error: false - - - name: Lint with flake8 - if: always() - run: | - pip install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics --ignore=E203 - - - name: Format imports with isort - if: always() - run: | - pip install isort - isort --profile black . - - - name: Format with black - id: blackCheck - if: always() - run: | - pip install black - black --check . - continue-on-error: true - - - name: Commit Black changes to the pull request - if: ${{ always() && steps.blackCheck.outcome == 'failure' }} - run: | - git config --global user.name 'github-actions[bot]' - git config --global user.email 'github-actions[bot]@users.noreply.github.com' - git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY - black . - git checkout $GITHUB_HEAD_REF - git commit -am "🎨 Format Python code with Black" - git push - - - name: Test with pytest - if: always() - env: - VIADOT_CONFIG_PATH: .config/credentials.json.template - run: | - pip install pytest - sudo apt install libsqliteodbc - pytest tests/unit - - # - name: Generate coverage report - # run: | - # pytest --cov-report xml --cov=viadot tests/ - # - name: Upload coverage report to Codecov - # uses: codecov/codecov-action@v1 - # with: - # token: ${{ secrets.CODECOV_TOKEN }} - # fail_ci_if_error: true - - publish_docker: - needs: build - runs-on: ubuntu-latest - if: github.ref == 'refs/heads/dev' && github.event_name == 'push' - steps: - - name: "Checkout source code" - uses: actions/checkout@v2 - with: - ref: ${{ github.ref }} - - - name: Build image - run: export DOCKER_BUILDKIT=1 && docker build . --file docker/Dockerfile --tag $IMAGE_NAME - - - name: Log into registry - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login docker.pkg.github.com -u ${{ github.actor }} --password-stdin - - - name: Push image - run: | - IMAGE_ID=docker.pkg.github.com/${{ github.repository }}/$IMAGE_NAME - - # Change all uppercase to lowercase - IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') - - # Publish with the `dev` label - echo IMAGE_ID=$IMAGE_ID - docker tag $IMAGE_NAME $IMAGE_ID:dev - docker push $IMAGE_ID:dev diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 000000000..d5f038ba0 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,127 @@ +name: Release + +on: + push: + tags: + - "v2.*.*" # Match tags that begin with "v2". + +jobs: + build-distribution: + name: Build distribution πŸ“¦ + timeout-minutes: 5 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install pypa/build + run: python3 -m pip install build --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: Publish to PyPI πŸš€ + timeout-minutes: 5 + needs: + - build-distribution + runs-on: ubuntu-latest + environment: + name: release + url: https://pypi.org/p/viadot2 + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + publish-docker-images: + name: Publish Docker images 🐳 + timeout-minutes: 15 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + platforms: all + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log into GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Construct the tag for Docker images + run: | + # Strip the "v" prefix for the image tag. + VERSION=${{ github.ref_name }} + echo "TAG=${VERSION#v}" >> $GITHUB_ENV + + - name: Build and publish viadot-lite image + uses: docker/build-push-action@v6 + with: + context: . + file: docker/Dockerfile + platforms: linux/amd64 + push: true + target: viadot-lite + tags: | + ghcr.io/${{ github.repository }}/viadot-lite:${TAG} + ghcr.io/${{ github.repository }}/viadot-lite:latest + + - name: Build and publish viadot-aws image + uses: docker/build-push-action@v6 + with: + context: . + file: docker/Dockerfile + platforms: linux/amd64 + push: true + target: viadot-aws + tags: | + ghcr.io/${{ github.repository }}/viadot-aws:${TAG} + ghcr.io/${{ github.repository }}/viadot-aws:latest + + - name: Build and publish viadot-azure image + uses: docker/build-push-action@v6 + with: + context: . + file: docker/Dockerfile + platforms: linux/amd64 + push: true + target: viadot-azure + tags: | + ghcr.io/${{ github.repository }}/viadot-azure:${TAG} + ghcr.io/${{ github.repository }}/viadot-azure:latest + build-args: INSTALL_DATABRICKS=false + + create_github_release: + name: Create a GitHub release πŸ”– + timeout-minutes: 5 + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + - name: Create a release + uses: ncipollo/release-action@v1 + with: + generateReleaseNotes: true diff --git a/.github/workflows/ci-2.0.yml b/.github/workflows/ci.yml similarity index 93% rename from .github/workflows/ci-2.0.yml rename to .github/workflows/ci.yml index c3f8186b8..f58a777de 100644 --- a/.github/workflows/ci-2.0.yml +++ b/.github/workflows/ci.yml @@ -40,12 +40,12 @@ jobs: - uses: actions/checkout@v4 - name: Set up Rye - uses: eifinger/setup-rye@v2 + uses: eifinger/setup-rye@v4 - name: Install dependencies run: | rye config --set-bool behavior.use-uv=true - rye sync --no-lock + rye sync --no-lock --features=aws - name: Run tests run: rye run coverage run --branch -m pytest tests/unit -vv diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml deleted file mode 100644 index 2990d36b7..000000000 --- a/.github/workflows/deploy_docs.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: deploy_docs - -on: - workflow_dispatch: - -jobs: - publish_docs: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - run: pip install mkdocs-material mkdocstrings - - run: pip install -r requirements.txt - - run: pip install -e . - - run: mkdocs gh-deploy --force diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml deleted file mode 100644 index 794f57a60..000000000 --- a/.github/workflows/docker-publish.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: "Publish Docker image" -# Publish the viadot Docker image with specified tags. -run-name: "Deploy viadot:${{ github.event.inputs.tag }} image (@${{ github.actor }})" - -on: - workflow_dispatch: - inputs: - tag: - description: "The tag to use for the image." - required: true - default: "dev" - if_databricks: - description: "Whether to install Databricks source dependencies." - required: false - default: "false" -jobs: - docker: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - with: - platforms: all - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Log into GitHub Container Registry - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build - uses: docker/build-push-action@v3 - with: - context: . - file: docker/Dockerfile - platforms: linux/amd64 - push: true - tags: ghcr.io/${{ github.repository }}/viadot:${{ github.event.inputs.tag }} - build-args: INSTALL_DATABRICKS=${{ github.event.inputs.if_databricks }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 83785c965..000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,128 +0,0 @@ -# This workflow will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - -name: release -# Publish a release to Docker registry and PyPI - -# Controls when the action will run. -on: - # Triggers the workflow when a release is created - release: - types: [created] - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -env: - IMAGE_NAME: viadot - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - # Validate the docker image. - # See also https://docs.docker.com/docker-hub/builds/automated-testing/ - test_docker: - runs-on: ubuntu-latest - steps: - - name: "Checkout source code" - uses: actions/checkout@v2 - with: - ref: ${{ github.ref }} - - name: Test docker image - run: | - if [ -f docker-compose.test.yml ]; then - docker-compose --file docker/docker-compose.test.yml build - docker-compose --file docker/docker-compose.test.yml run sut - else - docker build . --file docker/Dockerfile - fi - # Push the image to GitHub Packages. - # See also https://docs.docker.com/docker-hub/builds/ - publish_docker: - # Ensure test job passes before pushing the image. - needs: test_docker - runs-on: ubuntu-latest - steps: - - name: "Checkout source code" - uses: actions/checkout@v2 - with: - ref: ${{ github.ref }} - - - name: Build - run: docker build . --file docker/Dockerfile --tag $IMAGE_NAME - - - name: Log into registry - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login docker.pkg.github.com -u ${{ github.actor }} --password-stdin - - - name: Push - run: | - IMAGE_ID=docker.pkg.github.com/${{ github.repository }}/$IMAGE_NAME - # Change all uppercase to lowercase - IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') - # Strip git ref prefix from version - VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') - # Strip "v" prefix from tag name - [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') - - # Push with release tag - echo IMAGE_ID=$IMAGE_ID - echo VERSION=$VERSION - docker tag $IMAGE_NAME $IMAGE_ID:$VERSION - docker push $IMAGE_ID:$VERSION - - # Push also with "latest" tag - docker tag $IMAGE_NAME $IMAGE_ID:latest - docker push $IMAGE_ID:latest - - publish_pypi: - runs-on: ubuntu-latest - - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - name: "Checkout source code" - uses: actions/checkout@v2 - with: - ref: ${{ github.ref }} - - # Sets up python - - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - # BROKEN - # # Automatically bump package version - # - name: Autobump version - # run: | - # # from refs/tags/v1.2.3 get 1.2.3 - # VERSION=$(echo $GITHUB_REF | sed 's#.*/v##') - # PLACEHOLDER='__version__ = "develop"' - # VERSION_FILE='viadot/__init__.py' - # # ensure the placeholder is there. If grep doesn't find the placeholder - # # it exits with exit code 1 and github actions aborts the build. - # grep "$PLACEHOLDER" "$VERSION_FILE" - # sed -i "s/$PLACEHOLDER/__version__ = \"${VERSION}\"/g" "$VERSION_FILE" - # shell: bash - - # Install dependencies - - name: "Installs dependencies" - run: | - python -m pip install --upgrade pip - python -m pip install setuptools wheel twine - - # Build and upload to PyPI - - name: "Builds and uploads to PyPI" - run: | - python setup.py sdist bdist_wheel - python -m twine upload dist/* - env: - TWINE_USERNAME: acivitillo - TWINE_PASSWORD: ${{ secrets.TWINE_TOKEN }} - publish_docs: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - run: pip install mkdocs-material mkdocstrings - - run: pip install -r requirements.txt - - run: pip install -e . - - run: mkdocs gh-deploy --force diff --git a/.gitignore b/.gitignore index 11d902f10..f2bed40d8 100644 --- a/.gitignore +++ b/.gitignore @@ -82,6 +82,9 @@ target/ profile_default/ ipython_config.py +# pyenv +.python-version + # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies @@ -126,13 +129,14 @@ dmypy.json # Pyre type checker .pyre/ -# Linux +# OS files +.DS_Store .bash_history .bashrc .viminfo .netrwhist .ssh - +.python_history # Azure .azure @@ -162,6 +166,17 @@ profiles.yaml # DataHub .datahub +# local/env +*.prefect/ +*.config/ +*.local/ + +# VS Code +.vscode-server/ + +# Jupyter notebook +*.ipynb + # Git .gitconfig diff --git a/.vscode/install_extensions.sh b/.vscode/install_extensions.sh index 9c03f7ff5..90df08ae2 100644 --- a/.vscode/install_extensions.sh +++ b/.vscode/install_extensions.sh @@ -1 +1,9 @@ -grep -v '^#' extensions.list | xargs -L 1 code --install-extension +#!/usr/bin/env bash + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +EXTENSIONS_RELATIVE_PATH="./extensions.list" + +EXTENSIONS_PATH=$(realpath $SCRIPT_DIR/$EXTENSIONS_RELATIVE_PATH) + +export DONT_PROMPT_WSL_INSTALL=true +grep -v '^#' $EXTENSIONS_PATH | xargs -L 1 code --install-extension diff --git a/CHANGELOG.md b/CHANGELOG.md index 74cc77ba4..e70f5016c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Added new version of `Genesys` connector and test files. +- Added new version of `Outlook` connector and test files. +- Added new version of `Hubspot` connector and test files. +- Added `Mindful` connector and test file. + ### Added +- Added `sap_to_parquet` Prefect flow. +- Added `duckdb_to_sql_server`, `duckdb_to_parquet`, `duckdb_transform` Prefect flows. +- Added `bcp` and `duckdb_query` Prefect tasks. +- Added `DuckDB` source class. +- Added `sql_server_to_minio` flow for prefect. +- Added `df_to_minio` task for prefect +- Added handling for `DatabaseCredentials` and `Secret` blocks in `prefect/utlis.py:get_credentials` +- Added `SQLServer` source and tasks `create_sql_server_table`, `sql_server_to_df`,`sql_server_query` +- Added `basename_template` to `MinIO` source - Added `_empty_column_to_string` and `_convert_all_to_string_type` to convert data types to string. - Added `na_values` parameter to `Sharepoint` class to parse `N/A` values coming from the excel file columns. - Added `get_last_segment_from_url` function to sharepoint file. @@ -35,6 +49,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Changed location of `task_utils.py` and removed unused/prefect1-related tasks. - Changed the way of handling `NA` string values and mapped column types to `str` for `Sharepoint` source. - Added `SQLServerToDF` task - Added `SQLServerToDuckDB` flow which downloads data from SQLServer table, loads it to parquet file and then uploads it do DuckDB @@ -53,6 +68,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Removed Prefect dependency from the library (Python library, Docker base image) - Removed `catch_extra_separators()` from `SAPRFCV2` class +### Fixed + +- Fixed the typo in credentials in `SQLServer` source + ## [0.4.3] - 2022-04-28 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ed8605c24..63951b332 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,17 +1,17 @@ -# How to contribute to `viadot` +# How to contribute to `viadot2` -## Setting up the environment +## Installation & set up -Follow the instructions in the [README](./README.md) to set up your development environment. +Follow instructions in the [documentation](./docs/getting_started.md) to set up your development environment. ### VSCode -We provide the extensions, settings, and tasks for VSCode in the `.vscode` folder. +For an enhanced experience, we provide the extensions, settings, and tasks for VSCode in the `.vscode` folder. -1. Install the extensions +1. Install recommended extensions ```console - cd .vscode && sh install_extensions.sh && cd .. + bash .vscode/install_extensions.sh ``` 2. Open the project in VSCode @@ -20,17 +20,9 @@ We provide the extensions, settings, and tasks for VSCode in the `.vscode` folde code . ``` -### Development Docker container +## Pre-commit hooks -If you wish to develop in a Docker container, viadot comes with a VSCode task to make that simple. You can easily spin up a terminal in the container with the `Ctrl+Shift+B` shortcut. The container will have all of the contents of the root `viadot` directory mapped to `/home/viadot`. - -### Environment variables - -To run tests, you may need to set up some environment variables or the viadot config. You can find all the required environment variables in the [tests' dotenv file](./tests/.env.example), and all the required viadot config settings in the [config file](./config.yaml.example). We're working on making this process easier, so only one of these can be used. - -### Pre-commit hooks - -We use pre-commit hooks to ensure that the code (as well as non-code text files, such as JSON, YAML, and Markdown files) is formatted and linted before committing. First, install `pre-commit`: +We use pre-commit hooks to ensure that the code as well as non-code text (such as JSON, YAML, and Markdown) is formatted and linted before committing. First, install `pre-commit`: ```console rye install pre-commit @@ -42,46 +34,69 @@ To install `viadot`'s pre-commit hooks, run the following command: pre-commit install ``` +## Running tests + +### Unit tests + +To run unit tests, simply execute: + +```console +pytest tests/unit +``` + +### Integration tests + +For integration tests, you need to set up some environment variables and viadot config. + +**NOTE**: Configs used internally within dyvenia are stored in our Passbolt instance. + +#### Environment variables + +You can find all used environment variables in the [tests' dotenv file](./tests/.env.example). The env file must be located at `tests/.env`. + +#### Config file + +You can find all used viadot config entries in the [example config file](./config.yaml.example). + ## Style guidelines -- Code should be formatted and linted with [ruff](https://docs.astral.sh/ruff/) using default settings. The easiest way to accomplish this is to use the VSCode extension and the provided VSCode settings. Additionally, the pre-commit hook will take care of this, as well as formatting non-python files. -- Commit messages should: - - begin with an emoji - - start with one of the following verbs, capitalized, immediately after the summary emoji: "Add", "Update", "Remove", "Fix", "Rename", and, sporadically, other ones, such as "Upgrade", "Downgrade", or whatever you find relevant for your particular situation - - contain a useful summary of what the commit is doing - See [this article](https://www.freecodecamp.org/news/how-to-write-better-git-commit-messages/) to understand basics of naming commits +### Code style -## Submitting a PR +Code should be formatted and linted with [ruff](https://docs.astral.sh/ruff/). All settings are defined in the [pyproject file](pyproject.toml). -1. [Fork the repo](https://github.com/dyvenia/viadot/fork) -2. [Install](./README.md#installation) and [configure](./README.md#configuration) `viadot` +The easiest way to format your code as you go is to use the VSCode extension and the provided VSCode settings - your code will be automatically formatted and linted on each save, and the linter will highlight areas in your code which need fixing. - **Note**: In order to run tests, you will also need to install dev dependencies in the `viadot_2` container with `docker exec -u root -it viadot_2 sh -c "pip install -r requirements-dev.txt"` +Additionally, the pre-commit hook runs `ruff check`, so you can also wait till you commit to receive the formatting/linting feedback. -3. Checkout a new branch +### Commit messages style - ```console - git checkout -b - ``` +Commit messages should: - Make sure that your base branch is `2.0`! +- begin with an emoji (we recommend using the [gitmoji](https://marketplace.visualstudio.com/items?itemName=seatonjiang.gitmoji-vscode) VSCode extension (it's already included in viadot's `.vscode/extensions.list`)) +- start with one of the following verbs, capitalized, immediately after the summary emoji: "Add", "Update", "Remove", "Fix", "Rename", and, sporadically, other ones, such as "Upgrade", "Downgrade", or whatever you find relevant for your particular situation +- contain a useful summary of what the commit is doing -4. Add your changes + See [this article](https://www.freecodecamp.org/news/how-to-write-better-git-commit-messages/) to understand the basics of commit naming. - **Note**: See out Style Guidelines for more information about commit messages and PR names +## Submitting a PR -5. Test the changes locally +1. [Fork the repo](https://github.com/dyvenia/viadot/fork) +2. Uncheck the "Copy the `main` branch only" box +3. Follow the setup outlined above +4. Checkout a new branch ```console - docker exec -it viadot_2 sh -c "pytest" + # Make sure that your base branch is `2.0`. + git switch 2.0 && git checkout -b ``` +5. Add your changes 6. Sync your fork with the `dyvenia` repo ```console git remote add upstream https://github.com/dyvenia/viadot.git git fetch upstream 2.0 - git checkout 2.0 + git switch 2.0 git rebase upstream/2.0 ``` @@ -91,20 +106,54 @@ pre-commit install git push --force ``` -8. [Submit a PR](https://github.com/dyvenia/viadot/compare) into the `2.0` branch. +8. [Submit a PR](https://github.com/dyvenia/viadot/compare/2.0...main) into the `2.0` branch. + + Make sure to read & check all relevant checkboxes in the PR description! + +## Mainteners-only - Make sure to read & check all relevant checkboxes in the PR template! +### Releasing a new version -## Releasing a new version +#### Bump package version -In order to release a new version, either add a commit with a version bump to the last PR, or create a specific release PR. To bump the package version, simply run: +Before creating a release, either add a commit with a version bump to the last PR included in the release, or create a specific release PR. To bump package version, simply run: ```console -rye version x.y.z +rye version major.minor.patch ``` -Make sure to follow [semantic versioning](https://semver.org/). +This will update the version in `pyproject.toml` accordingly. + +**NOTE**: Make sure to follow [semantic versioning](https://semver.org/). + +#### Release + +Once the new version PR is merged to `main`, publish a version tag: + +```bash +viadot_version=v2.1.0 +git switch main && \ + git pull && \ + git tag -a $viadot_version -m "Release $viadot_version" && \ + git push origin $viadot_version +``` -The merge to `2.0` automatically publishes the `viadot:2.0-latest` image. +Pushing the tag will trigger the release workflow, which will: -If required, you can manually [deploy the package to PyPI](https://github.com/dyvenia/viadot/actions/workflows/publish_to_pypi.yml) or [publish the image with another tag](https://github.com/dyvenia/viadot/actions/workflows/docker-publish.yml) (such as a version tag). +- create a release on GitHub +- publish the package to PyPI +- publish Docker images to ghcr.io + +### Running actions + +You can execute actions manually with [GitHub CLI](https://cli.github.com/manual/): + +```console +gh workflow run .yml +``` + +If you need to pass parameters to the workflow, you can do so with the `--json` flag: + +```console +echo '{"name":"scully", "greeting":"hello"}' | gh workflow run workflow.yml --json +``` diff --git a/README.md b/README.md index 1ea84180c..f62f9876a 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,13 @@ # Viadot -[![build status](https://github.com/dyvenia/viadot/actions/workflows/build.yml/badge.svg)](https://github.com/dyvenia/viadot/actions/workflows/build.yml) -[![formatting](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -[![codecov](https://codecov.io/gh/Trymzet/dyvenia/branch/main/graph/badge.svg?token=k40ALkXbNq)](https://codecov.io/gh/Trymzet/dyvenia) +[![Rye](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/rye/main/artwork/badge.json)](https://rye.astral.sh) +[![formatting](https://img.shields.io/badge/style-ruff-41B5BE?style=flat)](https://img.shields.io/badge/style-ruff-41B5BE?style=flat) --- -**Documentation**: https://dyvenia.github.io/viadot/ +**Documentation**: https://viadot.docs.dyvenia.com -**Source Code**: https://github.com/dyvenia/viadot +**Source Code**: https://github.com/dyvenia/viadot/tree/2.0 --- @@ -35,15 +34,29 @@ print(df) The above `df` is a pandas `DataFrame` object. It contains data downloaded by `viadot` from the Carbon Intensity UK API. -## Loading Data to a Source +## Loading data to a destination -Depending on the source, `viadot` provides different methods of uploading data. For instance, for SQL sources, this would be bulk inserts. For data lake sources, it would be a file upload. For ready-made pipelines including data validation steps using `dbt`, see [prefect-viadot](https://github.com/dyvenia/prefect-viadot). +Depending on the destination, `viadot` provides different methods of uploading data. For instance, for databases, this would be bulk inserts. For data lakes, it would be file uploads. + +For example: + +```python hl_lines="2 8-9" +from viadot.sources import UKCarbonIntensity +from viadot.sources import AzureDataLake + +ukci = UKCarbonIntensity() +ukci.query("/intensity") +df = ukci.to_df() + +adls = AzureDataLake(config_key="my_adls_creds") +adls.from_df(df, "my_folder/my_file.parquet") +``` ## Getting started ### Prerequisites -We assume that you have [Rye](https://rye-up.com/) installed: +We use [Rye](https://rye-up.com/). You can install it like so: ```console curl -sSf https://rye-up.com/get | bash @@ -51,16 +64,16 @@ curl -sSf https://rye-up.com/get | bash ### Installation -Clone the `2.0` branch, and set up and run the environment: - ```console -git clone https://github.com/dyvenia/viadot.git -b 2.0 && \ - cd viadot && \ - rye sync +pip install viadot2 ``` ### Configuration In order to start using sources, you must configure them with required credentials. Credentials can be specified either in the viadot config file (by default, `$HOME/.config/viadot/config.yaml`), or passed directly to each source's `credentials` parameter. -You can find specific information about each source's credentials in [the documentation](https://dyvenia.github.io/viadot/references/sql_sources/). +You can find specific information about each source's credentials in [the documentation](https://viadot.docs.dyvenia.com/references/sources/sql_sources). + +### Next steps + +Check out the [documentation](https://viadot.docs.dyvenia.com) for more information on how to use `viadot`. diff --git a/config.yaml.example b/config.yaml.example index 32c57434e..4b8c81b48 100644 --- a/config.yaml.example +++ b/config.yaml.example @@ -37,15 +37,15 @@ sources: region_name: "eu-west-1" - trino_dev: - class: Trino - credentials: - host: localhost - port: 8080 - user: "my_username" - password: "my_password" - catalog: "my_catalog" - http_scheme: "http" - verify: False + class: Trino + credentials: + host: localhost + port: 8080 + user: "my_username" + password: "my_password" + catalog: "my_catalog" + http_scheme: "http" + verify: False - minio_dev: class: MinIO @@ -56,3 +56,34 @@ sources: bucket: my_bucket secure: True verify: False + + - databricks_dev: + class: Databricks + credentials: + host: your_host + cluster_id: your_cluster_id + token: your_token + + - c4c_dev: + class: CloudForCustomers + credentials: + username: your_username + password: your_password + + - outlook_dev: + class: Outlook + credentials: + client_id: your_client_id + client_secret: your_client_secret + tenant_id: your_tenant_id + + - hubspot_dev: + class: Hubspot + credentials: + token: your_api_key + + - mindful_dev: + class: Mindful + credentials: + customer_uuid: your_customer_uuid + auth_token: your_token diff --git a/docker/Dockerfile b/docker/Dockerfile index 5c5acd577..c0351a093 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,7 @@ -FROM python:3.10-slim-bullseye +FROM python:3.10-slim-bullseye AS base + +ARG PIP_NO_CACHE_DIR=1 +ARG UV_NO_CACHE=true # Add user RUN useradd --non-unique --uid 1000 --create-home viadot && \ @@ -15,15 +18,34 @@ SHELL ["/bin/sh", "-c"] RUN groupadd docker && \ usermod -aG docker viadot -# Release File Error -# https://stackoverflow.com/questions/63526272/release-file-is-not-valid-yet-docker -RUN echo "Acquire::Check-Valid-Until \"false\";\nAcquire::Check-Date \"false\";" | cat > /etc/apt/apt.conf.d/10no--check-valid-until - # System packages -RUN apt update -q && yes | apt install -q gnupg vim unixodbc-dev build-essential \ - curl python3-dev libboost-all-dev libpq-dev python3-gi sudo git software-properties-common -ENV PIP_NO_CACHE_DIR=1 -RUN pip install --upgrade cffi +# unixodbc: used by pyodbc +# libgeos-dev: used by Shapely +# gcc: used by pandas +RUN apt update -q && yes | apt install -q gnupg vim curl git unixodbc libgeos-dev gcc + +# Turn off package manager caches. +ENV PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} +ENV UV_NO_CACHE=${UV_NO_CACHE} + +# This one's needed for the SAP RFC connector. +# It must be installed here as the `pyrfc` does not define its dependencies, +# so `pip install pyrfc` breaks if all deps are not already present. +RUN pip install cython==0.29.24 + +# Python env +RUN pip install --upgrade pip setuptools + + +ENV USER viadot +ENV HOME="/home/$USER" +ENV PATH="$HOME/.local/bin:$PATH" +ENV RYE_HOME="$HOME/.rye" +ENV PATH="$RYE_HOME/shims:$PATH" + +# Install Rye and uv. +RUN curl -sSf https://rye.astral.sh/get | RYE_TOOLCHAIN_VERSION="3.10" RYE_INSTALL_OPTION="--yes" bash && \ + rye config --set-bool behavior.use-uv=true # Fix for old SQL Servers still using TLS < 1.2 RUN chmod +rwx /usr/lib/ssl/openssl.cnf && \ @@ -34,12 +56,44 @@ RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ apt update -q && \ apt install -q libsqliteodbc && \ - ACCEPT_EULA=Y apt install -q -y msodbcsql17=17.8.1.1-1 && \ - ACCEPT_EULA=Y apt install -q -y mssql-tools && \ + ACCEPT_EULA=Y apt install -q -y msodbcsql17=17.10.1.1-1 && \ + ACCEPT_EULA=Y apt install -q -y mssql-tools=17.10.1.1-1 && \ echo 'export PATH="$PATH:/opt/mssql-tools/bin"' >> ~/.bashrc COPY docker/odbcinst.ini /etc +WORKDIR ${HOME} + +COPY --chown=${USER}:${USER} . ./viadot + +################### +### viadot-lite ### +################### + +FROM base as viadot-lite + +# Turn off package manager caches. +ENV PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} +ENV UV_NO_CACHE=${UV_NO_CACHE} + +RUN rye lock --reset --features viadot-lite --pyproject viadot/pyproject.toml +RUN sed '/-e/d' ./viadot/requirements.lock > ./viadot/requirements.txt +RUN pip install -r ./viadot/requirements.txt + +RUN pip install ./viadot/ + +# Cleanup. +RUN rm -rf ./viadot + +USER ${USER} + + +#################### +### viadot-azure ### +#################### + +FROM base as viadot-azure + ARG INSTALL_DATABRICKS=false # Databricks source setup @@ -56,30 +110,45 @@ RUN if [ "$INSTALL_DATABRICKS" = "true" ]; then \ ENV SPARK_HOME /usr/local/lib/python3.10/site-packages/pyspark -# This one's needed for the SAP RFC connector. -# It must be installed here as the SAP package does not define its dependencies, -# so `pip install pyrfc` breaks if all deps are not already present. -RUN pip install cython==0.29.24 - -# Python env -RUN pip install --upgrade pip - -ENV USER viadot -ENV HOME="/home/$USER" -ENV PATH="$HOME/.local/bin:$PATH" - -WORKDIR ${HOME} +ARG INSTALL_DATABRICKS=false -COPY --chown=${USER}:${USER} . ./viadot +# Turn off package manager caches. +ENV PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} +ENV UV_NO_CACHE=${UV_NO_CACHE} +RUN rye lock --reset --features azure --pyproject viadot/pyproject.toml +RUN sed '/-e/d' ./viadot/requirements.lock > ./viadot/requirements.txt +RUN pip install -r ./viadot/requirements.txt -# Dependecy install RUN if [ "$INSTALL_DATABRICKS" = "true" ]; then \ pip install ./viadot/.[databricks]; \ - else \ - pip install ./viadot; \ fi +# Dependecy install +RUN pip install ./viadot/.[azure] + +# Cleanup. +RUN rm -rf ./viadot + +USER ${USER} + + +################## +### viadot-aws ### +################## + +FROM base as viadot-aws + +# Turn off package manager caches. +ENV PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} +ENV UV_NO_CACHE=${UV_NO_CACHE} + +RUN rye lock --reset --features aws --pyproject viadot/pyproject.toml +RUN sed '/-e/d' ./viadot/requirements.lock > ./viadot/requirements.txt +RUN pip install -r ./viadot/requirements.txt + +RUN pip install ./viadot/.[aws] + # Cleanup. RUN rm -rf ./viadot diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index b9f6ee296..3ff4b0976 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,13 +1,33 @@ version: "3" services: - viadot_2: - image: ghcr.io/dyvenia/viadot/viadot:2.0-latest - container_name: viadot_2 + viadot-lite: + image: ghcr.io/dyvenia/viadot/viadot-lite:latest + container_name: viadot-lite volumes: # - ${HOME}/.databricks-connect:/home/viadot/.databricks-connect # - ${HOME}/.config/viadot/config.yaml:/home/viadot/.config/viadot/config.yaml - ../:/home/viadot shm_size: "4gb" - command: sleep infinity + entrypoint: sh ./docker/entrypoint.sh + restart: "unless-stopped" + viadot-azure: + image: ghcr.io/dyvenia/viadot/viadot-azure:latest + container_name: viadot-azure + volumes: + # - ${HOME}/.databricks-connect:/home/viadot/.databricks-connect + # - ${HOME}/.config/viadot/config.yaml:/home/viadot/.config/viadot/config.yaml + - ../:/home/viadot + shm_size: "4gb" + entrypoint: sh ./docker/entrypoint.sh + restart: "unless-stopped" + viadot-aws: + image: ghcr.io/dyvenia/viadot/viadot-aws:latest + container_name: viadot-aws + volumes: + # - ${HOME}/.databricks-connect:/home/viadot/.databricks-connect + # - ${HOME}/.config/viadot/config.yaml:/home/viadot/.config/viadot/config.yaml + - ../:/home/viadot + shm_size: "4gb" + entrypoint: sh ./docker/entrypoint.sh restart: "unless-stopped" diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 000000000..c561085ce --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# Check whether dev requirements are already initialized. +if [ ! -f "./requirements-dev.txt" ]; then + echo "Installing dev dependencies..." + sed '/-e/d' requirements-dev.lock > requirements-dev.txt + pip install -r requirements-dev.txt --user + echo "Dev dependencies have been installed successfully!" +fi + +sleep infinity diff --git a/docker/odbcinst.ini b/docker/odbcinst.ini index 6594a39e7..3367ebf85 100644 --- a/docker/odbcinst.ini +++ b/docker/odbcinst.ini @@ -1,6 +1,6 @@ [ODBC Driver 17 for SQL Server] Description=Microsoft ODBC Driver 17 for SQL Server -Driver=/opt/microsoft/msodbcsql17/lib64/libmsodbcsql-17.8.so.1.1 +Driver=/opt/microsoft/msodbcsql17/lib64/libmsodbcsql-17.10.so.1.1 UsageCount=1 [SQLite] diff --git a/docs/advanced_usage/containerized_env.md b/docs/advanced_usage/containerized_env.md new file mode 100644 index 000000000..ff5810ba8 --- /dev/null +++ b/docs/advanced_usage/containerized_env.md @@ -0,0 +1,45 @@ +# Containerized development environment + +Currently, viadot ships with three images: + +- `viadot-lite` - includes the core `viadot2` library and system dependencies +- `viadot-azure` - includes `viadot2` with the `azure` extra, as well as Azure-related OS dependencies +- `viadot-aws` - includes `viadot2` with the `aws` extra, as well as AWS-related OS dependencies + +You can use these images to avoid installing any OS dependencies on your local machine (for example, `msodbcsql17` and `mssql-tools` used by the `SQLServer` source). + +## Setup + +Spin up your container of choice with `docker compose`: + +```bash +docker compose -f docker/docker-compose.yml up -d viadot- +``` + +For example, to start the `viadot-aws` container: + +```bash +docker compose -f docker/docker-compose.yml up -d viadot-aws +``` + +## Usage + +### Attaching to the container + +Once you have a container running, use an IDE like VSCode to attach to it. Alternatively, you can also attach to the container using the CLI: + +```bash +docker exec -it viadot- bash +``` + +### Building a custom image locally + +If you need to build a custom image locally, you can do so using standard Docker commands. For example: + +```bash +docker build --target viadot- --tag viadot-: -f docker/Dockerfile . +``` + +### See also + +For more information on working with Docker containers and images, see [Docker documentation](https://docs.docker.com/reference/cli/docker/). diff --git a/docs/advanced_usage/index.md b/docs/advanced_usage/index.md new file mode 100644 index 000000000..d664e02d5 --- /dev/null +++ b/docs/advanced_usage/index.md @@ -0,0 +1,7 @@ +# Advanced usage guide + +In this guide, we'll describe some of the more advanced `viadot` concepts and workflows, such as: + +- working in a containerized environment +- using secrets stores instead of the config file +- etc. diff --git a/docs/developer_guide/contributing_to_viadot.md b/docs/developer_guide/contributing_to_viadot.md new file mode 100644 index 000000000..7146d3df5 --- /dev/null +++ b/docs/developer_guide/contributing_to_viadot.md @@ -0,0 +1,9 @@ +# Contributing to viadot + +## How to contribute + +Before creating a PR, make sure to familiarize yourself with the [Contributing Guide](https://github.com/dyvenia/viadot/blob/2.0/CONTRIBUTING.md). + +## Next steps + +To learn advanced `viadot` concepts and usage patterns, see the [advanced usage guide](../advanced_usage/index.md). diff --git a/docs/developer_guide/creating_a_prefect_flow.md b/docs/developer_guide/creating_a_prefect_flow.md new file mode 100644 index 000000000..a06984466 --- /dev/null +++ b/docs/developer_guide/creating_a_prefect_flow.md @@ -0,0 +1,165 @@ +# Creating jobs + +Let's assume that we've finished our `PostgreSQL` source. We now want to use it to automatically download data from PostgreSQL on a schedule. + +!!! note + + Job scheduling is left outside of the scope of this guide - we only focus on creating the job itself. + +We create our ingestion job by utilizing [Prefect](https://www.prefect.io/) as our orchestration tool. We will create a Prefect flow, `postgresql_to_adls`. This flow will utilize our new connector to download a PostgreSQL table into a pandas `DataFrame`, and then upload the data to Azure Data Lake. The flow will consist of two tasks: + +- `postgresql_to_df` - downloads a PostgreSQL table into a pandas `DataFrame` +- `df_to_adls` - uploads a pandas `DataFrame` to Azure Data Lake + +## Creating a task + +Below is an example task: + +```python +# orchestration/prefect/tasks/postgresql.py + +from viadot.sources import PostgreSQL +from prefect import task + +from prefect_viadot.exceptions import MissingSourceCredentialsError +from prefect_viadot.utils import get_credentials + +import pandas as pd + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def postgresql_to_df(credentials_key: str | None = None, credentials_secret: str | None = None, ...) -> pd.DataFrame: + if not (credentials_secret or config_key): + raise MissingSourceCredentialsError + + credentials = credentials or get_credentials(credentials_secret) + postgres = PostgreSQL(credentials=credentials, config_key=config_key) + return postgres.to_df(...) +``` + +!!! info "Best practices" + + 1. The task should be a thin wrapper over a `viadot` function or method (ie. it shouldn't contain any logic but simply use relevant `viadot` functions and classes). + 2. The task **MUST NOT** allow specifying the credentials directly. Credentials for the source must be passed via config key and/or secret. This is because Prefect stores the values of task parameters and sometimes even logs them in the UI, which means that passing the credentials directly creates a security risk. + 3. Validate the credentials and raise `MissingSourceCredentialsError` if needed. + 4. Utilize retries and timeouts to make the task and the entire system more robust. + +When you are done with the task, remember to import it in `tasks/__init__.py`, so that it can be imported with `from viadot.orchestration.prefect.tasks import postgresql_to_df` (instead of `from viadot.orchestration.prefect.tasks.postgresql_to_df import postgresql_to_df`): + +```python +# orchestration/prefect/tasks/__init__.py + +from .postgresql import postgresql_to_df # noqa: F401 +``` + +!!! note "Tests" + + Note that since the orchestration layer is only a thin wrapper around `viadot` sources, we don't require unit or integration tests for tasks or flows. Instead, add all your unit and integration tests at the source connector level. + +## Tasks using optional dependencies + +Similar to sources, in case your task uses an optional dependency, it has to be escaped: + +```python +# orchestration/prefect/tasks/adls.py + +"""Tasks for interacting with Azure Data Lake (gen2).""" + +import contextlib +... + +with contextlib.suppress(ImportError): + from viadot.sources import AzureDataLake + +... +``` + +In case you're adding task/flow tests, remember to also escape the imports with `viadot.utils.skip_test_on_missing_extra()`! + +## Creating a Prefect flow + +Once the tasks are ready, the last step of our development is to define the flow: + +```python +# orchestration/prefect/flows/postgresql_to_adls.py + +from viadot.orchestration.prefect.tasks import df_to_adls, postgresql_to_df + +from prefect import flow + +@flow( + name="extract--postgresql--adls", + description="Extract data from PostgreSQL database and load it into Azure Data Lake.", + retries=1, + retry_delay_seconds=60, + timeout_seconds=60*60, +) +def postgresql_to_adls( + adls_path: str, + adls_credentials_secret: str | None, + adls_config_key: str | None, + overwrite: bool = False, + postgresql_config_key: str | None, + postgresql_credentials_secret: str | None, + sql_query: str | None, + ) -> None: + + df = postgresql_to_df( + credentials_secret=postgresql_credentials_secret, + config_key=postgresql_config_key, + sql_query=sql_query, + ) + return df_to_adls( + df=df, + path=adls_path, + credentials_secret=adls_credentials_secret, + config_key=adls_config_key, + overwrite=overwrite, + ) + +``` + +!!! info "Best practices" + + 1. The flow should be a thin wrapper over the tasks, and should contain minimal logic. If your flow is getting too complex, it means that you're probably working around the limitations for `viadot`. Instead of adding workarounds in the flow, simply add the missing functionality to the connector you're using. This will make the functionality easier to test. It will also make it reusable across different orchestrators (eg. Airflow). + 2. Utilize retries and timeouts to make the flow and the entire system more robust*. + + *if you do use retries, make sure the flow is [idempotent](https://airbyte.com/data-engineering-resources/idempotency-in-data-pipelines) + +When you are done with the flow, remember to import it in the init file, so that it can be imported with `from viadot.orchestration.prefect.flows import postgresql_to_adls` (instead of `from viadot.orchestration.prefect.flows.postgresql_to_adls import postgresql_to_adls`): + +```python +# orchestration/prefect/flows/__init__.py + +... +from .postgresql_to_adls import postgresql_to_adls + +__all__ = [ + ..., + "postgresql_to_adls" +] +``` + +## Adding docs + +To allow MkDocs to autogenerate and display documentation for your tasks and flows in [reference docs](../references/orchestration/prefect/tasks.md), add relevant entries in the reference docs (`docs/references/orchestration/prefect`). For example: + +Task: + +```markdown +# docs/references/orchestration/prefect/tasks.md + +... + +::: viadot.orchestration.prefect.tasks.postgresql_to_df +``` + +Flow: + +```markdown +# docs/references/orchestration/prefect/flows.md + +... + +::: viadot.orchestration.prefect.flows.postgresql_to_adls +``` diff --git a/docs/developer_guide/creating_a_source.md b/docs/developer_guide/creating_a_source.md new file mode 100644 index 000000000..46bd305e2 --- /dev/null +++ b/docs/developer_guide/creating_a_source.md @@ -0,0 +1,321 @@ +# Creating a source connector + +## Example + +The first thing you need to do is create a class that inherits from the `SQL` class. You should also specify a [pydantic](https://medium.com/mlearning-ai/improve-your-data-models-with-pydantic-f9f10ca66f26) model for the source's credentials: + +```python +# sources/postgresql.py + +"""PostgreSQL connector.""" + +from viadot.sources.base import SQL +from pydantic import BaseModel + +class PostgreSQLCredentials(BaseModel): + host: str + port: int = 5432 + database: str + user: str + password: str + +class PostgreSQL(SQL): + + def __init__( + self, + credentials: PostgreSQLCredentials | None = None, + config_key: str | None = None, + *args, + **kwargs, + ): + ... +``` + +Credentials can now be provided directly via the `credentials` parameter or by [using the config key](#using-viadot-config). + +!!! warning "viadot metadata - hardcoded schemas workaround" + + The addition of viadot metadata columns (currently, `_viadot_source` and `_viadot_downloaded_at_utc`) should be done in the base class's `to_df()` method. However, due to some production uses of viadot relying on hardcoded DataFrame schemas (and not being able to either pin the `viadot` version or fix the hardcoding), this cannot currently be done. As a workaround, you need to implement the `to_df()` method in your source and add the columns yourself. + + Below is a an example for our Postgres connector. Since we can reuse the parent class's `to_df()` method, we're simply wrapping it with the `add_viadot_metadata_columns` decorator: + + ```python hl_lines="27-35" + # sources/postgresql.py + + """PostgreSQL connector.""" + + from viadot.sources.base import SQL + from viadot.utils import add_viadot_metadata_columns + from pydantic import BaseModel + + class PostgreSQLCredentials(BaseModel): + host: str + port: int = 5432 + database: str + user: str + password: str + + class PostgreSQL(SQL): + + def __init__( + self, + credentials: PostgreSQLCredentials | None = None, + config_key: str | None = None, + *args, + **kwargs, + ): + ... + + @add_viadot_metadata_columns + def to_df( + self, + query: str, + con: pyodbc.Connection | None = None, + if_empty: Literal["warn", "skip", "fail"] = "warn", + ) -> pd.DataFrame: + """Execute a query and return the result as a pandas DataFrame.""" + super().to_df() + ``` + + For more information, see [this issue](https://github.com/dyvenia/viadot/issues/737). + +Now, we also need to add a way to pass the credentials to the parent class: + +```python hl_lines="7 32-36" +# sources/postgresql.py + +"""PostgreSQL connector.""" + +from viadot.sources.base import SQL +from viadot.utils import add_viadot_metadata_columns +from viadot.config import get_source_credentials +from pydantic import BaseModel + +class PostgreSQLCredentials(BaseModel): + host: str + port: int = 5432 + database: str + user: str + password: str + +class PostgreSQL(SQL): + + def __init__( + self, + credentials: PostgreSQLCredentials | None = None, + config_key: str | None = None, + *args, + **kwargs, + ): + """A PostgreSQL connector. + + Args: + credentials (PostgreSQLCredentials, optional): Database credentials. + config_key (str, optional): The key in the viadot config holding relevant credentials. + """ + raw_creds = credentials or get_source_credentials(config_key) or {} + validated_creds = PostgreSQLCredentials(**raw_creds).dict( + by_alias=True + ) + super().__init__(*args, credentials=validated_creds, **kwargs) + + @add_viadot_metadata_columns + def to_df( + self, + query: str, + con: pyodbc.Connection | None = None, + if_empty: Literal["warn", "skip", "fail"] = "warn", + ) -> pd.DataFrame: + """Execute a query and return the result as a pandas DataFrame.""" + super().to_df() +``` + +Once you're done with the source, remember to import it in `sources/__init__.py`, so that it can be imported with `from viadot.sources import PostgreSQL` (instead of `from viadot.sources.postgresql import PostgreSQL`): + +```python +# sources/__init__.py + +from .postgresql import PostgreSQL + +__all__ = [ + ..., + "PostgreSQL" +] +``` + +## Sources using optional dependencies + +In case your source uses an [optional dependency](https://github.com/dyvenia/viadot/blob/2.0/pyproject.toml), you need to escape the import. In the example below, our source uses the optional `adlfs` package (part of the `azure` extra): + +```python hl_lines="5-8" +# sources/azure_data_lake.py + +from viadot.sources.base import Source + +try: + from adlfs import AzureBlobFileSystem, AzureDatalakeFileSystem +except ModuleNotFoundError as e: + msg = "Missing required modules to use AzureDataLake source." + raise ImportError(msg) from e + + +class AzureDataLake(Source): + ... +``` + +The import in `sources/__init__.py` also needs to be guarded: + +```python hl_lines="13-16" +# sources/__init__.py + +from importlib.util import find_spec + +from .cloud_for_customers import CloudForCustomers +... + +__all__ = [ + "CloudForCustomers", + ... +] + +if find_spec("adlfs"): + from viadot.sources.azure_data_lake import AzureDataLake # noqa: F401 + + __all__.extend(["AzureDataLake"]) +``` + +## Adding docs + +To allow MkDocs to autogenerate and display documentation for your source in [reference docs](../references/sources/sql_sources.md), add an entry in the reference docs (`docs/references/sources`). For example: + +```markdown +# docs/references/sources/sql_sources.md + +... + +::: viadot.sources.postgresql.PostgreSQL +``` + +## Adding tests + +Make sure to add tests for your source! + +### Unit + +You can think of unit tests as tests which do not require internet connection or connectivity to the actual data source or destination. All unit tests are executed automatically on each PR to `viadot`'s default branch. + +A common practice to ensure above requirements are met is to mock the external systems. For example, if we wish to create a unit test for our `Sharepoint` source which will test the `to_df()` method, which in turn depends on the `_download_excel()` method, we must first mock the `_download_excel()` method so that it doesn't actually try to download any data. Below is an example of how you can accomplish this: + +```python +# tests/unit/test_sharepoint.py + +import pandas as pd +from viadot.sources import Sharepoint + +TEST_CREDENTIALS = {"site": "test", "username": "test2", "password": "test"} + +class SharepointMock(Sharepoint): + def _download_excel(self, url=None): + """Returns a test DataFrame instead of calling a Sharepoint server.""" + return pd.ExcelFile(Path("tests/unit/test_file.xlsx")) + +def test_sharepoint(): + s = SharepointMock(credentials=TEST_CREDENTIALS) + df = s.to_df(url="test") + + assert not df.empty +``` + +### Integration + +Integration tests connect to the actual systems. For these tests, you will need to set up your viadot config with proper credentials. For example, to test a `Sharepoint` source, our config could look like this: + +```yaml +# ~/.config/viadot/config.yaml +version: 1 + +sources: + - sharepoint_dev: + class: Sharepoint + credentials: + site: "site.sharepoint.com" + username: "test_username" + password: "test_password" +``` + +Then, in our integration tests, we can use the `Sharepoint` source with the `sharepoint_dev` config key: + +```python +# tests/integration/test_sharepoint.py + +import pytest +... + +@pytest.fixture +def sharepoint(): + from viadot.sources import Sharepoint + + return Sharepoint(config_key="sharepoint_dev") +``` + +!!! info + + For more information on viadot config, see [this page](../developer_guide/config_key.md). + +### Optional dependencies + +Same as with the source, make sure to escape the imports of optional dependencies: + +```python +from viadot.utils import skip_test_on_missing_extra +... + +try: + from viadot.sources import AzureDataLake + +except ImportError: + skip_test_on_missing_extra(source_name="AzureDataLake", extra="azure") +``` + +## Using viadot config + +In order to avoid storing and passing credentials through variables, source configuration should be stored in the viadot config file (by default, `~/.config/viadot/config.yaml`). + +You can find each source's configuration in [the documentation](../references/sources/sql_sources.md). + +Below is an example config file, with configurations for two sources: + +```yaml +sources: + - exchange_rates: + class: ExchangeRates + credentials: + api_key: "api123api123api123" + + - sharepoint: + class: Sharepoint + credentials: + site: "site.sharepoint.com" + username: "user@email.com" + password: "password" +``` + +In the above, `exchange_rates` and `sharepoint` are what we refer to as "config keys". For example, this is how to use the `exchange_rates` config key to pass credentials to the `ExchangeRates` source: + +```python +# sources/exchange_rates.py + +source = ExchangeRates(config_key="exchange_rates") +``` + +This will pass the `credentials` key, including the `api_key` secret, to the instance. + +!!! info + + You can use any name for your config key, as long as it's unique. For example, we can have credentials for two different environments stored as `sharepoint_dev` and `sharepoint_prod` keys. + +## Conclusion + +And that's all you need to know to create your own `viadot` connectors! + +If you need inspiration, take a look at some of the [existing sources](https://github.com/dyvenia/viadot/blob/2.0/src/viadot/sources/). diff --git a/docs/developer_guide/index.md b/docs/developer_guide/index.md new file mode 100644 index 000000000..241df4748 --- /dev/null +++ b/docs/developer_guide/index.md @@ -0,0 +1,5 @@ +# Developer guide + +In this guide, we're going to be developing a new source and data orchestration job. + +We'll start by creating a `PostgreSQL` source connector. Then, we'll create a Prefect flow that downloads a PostgreSQL table into a pandas `DataFrame` and uploads the data as a Parquet file to Azure Data Lake. diff --git a/docs/getting_started.md b/docs/getting_started.md new file mode 100644 index 000000000..2d4b1dd7b --- /dev/null +++ b/docs/getting_started.md @@ -0,0 +1,102 @@ +# Getting started guide + +## Prerequisites + +We use [Rye](https://rye-up.com/). You can install it like so: + +```console +curl -sSf https://rye-up.com/get | bash +``` + +## Installation + +!!! note + + `viadot2` installation requires installing some Linux libraries, which may be complex for less technical users. For those users, we recommend using the [viadot container](./advanced_usage/containerized_env.md). + +### OS dependencies + +#### Core + +`viadot2` depends on some Linux system libraries. You can install them in the following way: + +```console +sudo apt update -q && \ + yes | apt install -q gnupg unixodbc && \ + curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ + curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ + sudo apt update -q && \ + sudo apt install -q libsqliteodbc && \ + ACCEPT_EULA=Y apt install -q -y msodbcsql17=17.10.1.1-1 && \ + ACCEPT_EULA=Y apt install -q -y mssql-tools=17.10.1.1-1 && \ + echo 'export PATH="$PATH:/opt/mssql-tools/bin"' >> ~/.bashrc +``` + +Next, copy the SQL Server config from `docker/odbcinst.ini` file into your `/etc/odbcinst.ini` file. + +```console +cat docker/odbcinst.ini | sudo tee -a /etc/odbcinst.ini +``` + +#### SAP connector + +In order to work with the SAP connector, you must also install the [SAP NetWeaver RFC SDK](https://support.sap.com/en/product/connectors/nwrfcsdk.html). You must have a SAP license in order to be able to download and use the driver. + +To install the driver, copy the `nwrfcsdk` folder to `/usr/local/sap/`. Then, also copy the `nwrfcsdk.conf` file to `/etc/ld.so.conf.d/`: + +```console +sudo cp -R ./sap_netweaver_rfc/ /usr/local/sap/ +sudo cp ./sap_netweaver_rfc/nwrfcsdk.conf /etc/ld.so.conf.d/ +``` + +Next, configure the `SAPNWRFC_HOME` env variable, eg. by adding the following entry in your `~/.bashrc` file: + +```bash +export SAPNWRFC_HOME=/usr/local/sap/nwrfcsdk +``` + +Make sure to reload the shell: + +```console +source ~/.bashrc +``` + +Next, configure the RFC driver with: + +```console +sudo ldconfig +``` + +Finally, you can install `pyrfc` by installing the viadot `sap` extra: + +```console +rye sync --features=sap +``` + +### Library + +```console +git clone https://github.com/dyvenia/viadot.git -b 2.0 && \ + cd viadot && \ + rye sync +``` + +!!! note + + Since `viadot` does not have an SDK, both adding new sources and flows requires **contributing your code to the library**. Hence, we install the library from source instead of just using `pip install`. However, installing `viadot2` with `pip install` is still possible: + + ```console + pip install viadot2 + ``` + + or, with the `azure` [extra](https://github.com/dyvenia/viadot/blob/2.0/pyproject.toml) as an example: + + ```console + pip install viadot2[azure] + ``` + + Note that the system dependencies **are not** installed via `pip` and must be installed separately a package manager such as `apt`. + +## Next steps + +Head over to the [developer guide](./developer_guide/index.md) to learn how to use `viadot` to build data connectors and jobs. diff --git a/docs/howtos/config_file.md b/docs/howtos/config_file.md deleted file mode 100644 index 06b5259f6..000000000 --- a/docs/howtos/config_file.md +++ /dev/null @@ -1,42 +0,0 @@ -# Config File - -Credentials and other settings for various **sources** are stored in a file named `credentials.json`. A credential file needs to be written in json format. A typical credentials file looks like so: - -```json -{ - "SUPERMETRICS": { - "API_KEY": "apikey from supermetrics", - "USER": "user@gmail.com", - "SOURCES": { - "Google Ads": { - "Accounts": ["456"] - } - } - }, - "AZURE_SQL": { - "server": "server url", - "db_name": "db name", - "user": "user", - "password": "db password", - "driver": "driver" - } -} -``` - -In the above **SUPERMETRICS** and **AZURE_SQL** are config_keys. These config settings are fed to the `Supermetrics()` or `AzureSQL()` Sources. - -For example, this is how to use the **AZURE_SQL** configuration stanza from the credentials file. - -```python -# initiates the AzureSQL class with the AZURE_SQL configs -azure_sql = AzureSQL(config_key="AZURE_SQL") -``` - -The above will pass all the configurations, including secrets like passwords, to the class. This avoids having to write secrets or configs in the code. - -## Storing the file locally - -Currently only local files are supported. Make sure to store the file in the correct path. - -- On Linux the path is `/home/viadot/.config/credentials.json` -- On Windows you need to create a `.config` folder with `credentials.json` inside the User folder `C:\Users\` diff --git a/docs/howtos/howto_migrate_sources_tasks_and_flows.md b/docs/howtos/howto_migrate_sources_tasks_and_flows.md deleted file mode 100644 index 5338687c5..000000000 --- a/docs/howtos/howto_migrate_sources_tasks_and_flows.md +++ /dev/null @@ -1,52 +0,0 @@ -# viadot sources, tasks & flows migration workflow - -This guide aims to assist the developers in migrating the sources, tasks and flows from viadot 1.0 to their respective new repos/branches. - -## 1. Migrating a source from viadot 1.0 to viadot 2.0 - -The process involves refactoring and modifying the existing sources in viadot 1.0 to work properly on [viadot 2.0](https://github.com/dyvenia/viadot/tree/2.0). This process include but are not limited to the following steps: - -### a. Decoupling Prefect from the source - -One of the aims of the migration is to completely decouple viadot 2.0 from prefect. Instead of having prefect bundled in with viadot, we have removed the prefect portion of viadot and moved it to another repo called [prefect-viadot](https://github.com/dyvenia/prefect-viadot/). -This includes but not limited to the following actions: - -#### 1. Removing `prefect` imports and replacing them if needed - -#### 2. Properly replacing any uses of `prefect.signals` with `viadot.signals` or native Python error handling - -#### 3. Removing unused/unnecessary imports - -#### 4. Replacing the `prefect` logger with the regular Python logger - -In short, your aim is to remove the Prefect components completely, while making sure the source works correctly. - -### Modify the credentials logic to use pydantic models - -Here's a [guide](https://medium.com/mlearning-ai/improve-your-data-models-with-pydantic-f9f10ca66f26) on how to implement pydantic models. Alternatively, you can also take a look at any of the sources already migrated to [viadot 2.0](https://github.com/dyvenia/viadot/tree/2.0/viadot/sources) as a reference. - -### Allow the source to take credentials from `config.yml` or `credentials.json` via a `config_key` parameter passed on to the source. - -This can be a dictionary of credentials that the source can use. - -### b. Migrating current tests - -The tests that are still valid and applicable should be migrated from viadot 1.0 to viadot 2.0. This might include modifying the existing tests. - -### c. Improving the test coverage - -It is possibe that the test coverage of the existing tests is not sufficient. Please ensure that the test coverage is high enough (~80%) and create new tests if needed. - -## 2. Migrating the task(s) & flow(s) related to the source from viadot 1.0 to prefect-viadot - -After the source is migrated and tested successfully, the next step would be to migrate any related flows to [prefect-viadot](https://github.com/dyvenia/prefect-viadot/). Generally speaking, the steps involved with this phase are very similar to the ones in the source migration section, so you can follow the same steps listed there. There might be some cases where some steps are not applicable (ex. some flows don't take any credentials). In these cases, you may skip whatever is not applicable. - -## 3. Creating PRs - -### a. Creating a PR to viadot 2.0 for the migrated sources - -Please create a PR on the [viadot repo](https://github.com/dyvenia/viadot), where the PR should merge into the `2.0` branch. - -### b. Creating a PR to prefect-viadot for the migrated tasks & flows - -Please create a PR on the [prefect-viadot repo](https://github.com/dyvenia/prefect-viadot), where the PR should merge into the `main` branch. diff --git a/docs/index.md b/docs/index.md index 9a9a551d2..0e98e4e72 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,34 +1,19 @@ # Viadot -[![build status](https://github.com/dyvenia/viadot/actions/workflows/build.yml/badge.svg)](https://github.com/dyvenia/viadot/actions/workflows/build.yml) -[![formatting](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -[![codecov](https://codecov.io/gh/Trymzet/dyvenia/branch/main/graph/badge.svg?token=k40ALkXbNq)](https://codecov.io/gh/Trymzet/dyvenia) - ---- - -**Documentation**: https://dyvenia.github.io/viadot/ - -**Source Code**: https://github.com/dyvenia/viadot - ---- - A simple data ingestion library to guide data flows from some places to other places. -## Structure - -This documentation is following the diΓ‘taxis framework. - -## Getting Data from a Source +## Getting data from a source -Viadot supports several API and RDBMS sources, private and public. Currently, we support the UK Carbon Intensity public API and base the examples on it. +Viadot supports several API and database sources, private and public. Below is a snippet of how to get data from the UK Carbon Intensity API: ```python -from viadot.sources.uk_carbon_intensity import UKCarbonIntensity +from viadot.sources import UKCarbonIntensity ukci = UKCarbonIntensity() ukci.query("/intensity") df = ukci.to_df() -df + +print(df) ``` **Output:** @@ -37,62 +22,26 @@ df | --: | ----------------- | :---------------- | -------: | -----: | :------- | | 0 | 2021-08-10T11:00Z | 2021-08-10T11:30Z | 211 | 216 | moderate | -The above `df` is a python pandas `DataFrame` object. The above df contains data downloaded from viadot from the Carbon Intensity UK API. - -## Loading Data to a Source - -Depending on the source, viadot provides different methods of uploading data. For instance, for SQL sources, this would be bulk inserts. For data lake sources, it would be a file upload. We also provide ready-made pipelines including data validation steps using Great Expectations. - -An example of loading data into SQLite from a pandas `DataFrame` using the `SQLiteInsert` Prefect task: - -```python -from viadot.tasks import SQLiteInsert - -insert_task = SQLiteInsert() -insert_task.run(table_name=TABLE_NAME, dtypes=dtypes, db_path=database_path, df=df, if_exists="replace") -``` - -## Running tests - -To run tests, log into the container and run pytest: - -``` -cd viadot/docker -run.sh -docker exec -it viadot_testing bash -pytest -``` - -## Running flows locally +The above `df` is a pandas `DataFrame` object. It contains data downloaded by `viadot` from the Carbon Intensity UK API. -You can run the example flows from the terminal: +## Loading data to a destination -``` -run.sh -docker exec -it viadot_testing bash -FLOW_NAME=hello_world; python -m viadot.examples.$FLOW_NAME -``` +Depending on the destination, `viadot` provides different methods of uploading data. For instance, for databases, this would be bulk inserts. For data lakes, it would be file uploads. -However, when developing, the easiest way is to use the provided Jupyter Lab container available at `http://localhost:9000/`. +For example: -## How to contribute +```python hl_lines="2 8-9" +from viadot.sources import UKCarbonIntensity +from viadot.sources import AzureDataLake -1. Clone the release branch -2. Pull the docker env by running `viadot/docker/update.sh -t dev` -3. Run the env with `viadot/docker/run.sh` -4. Log into the dev container and install in development mode so that viadot will auto-install at each code change: +ukci = UKCarbonIntensity() +ukci.query("/intensity") +df = ukci.to_df() +adls = AzureDataLake(config_key="my_adls_creds") +adls.from_df(df, "my_folder/my_file.parquet") ``` -docker exec -it viadot_testing bash -pip install -e . -``` - -5. Edit and test your changes with `pytest` -6. Submit a PR. The PR should contain the following: -- new/changed functionality -- tests for the changes -- changes added to `CHANGELOG.md` -- any other relevant resources updated (esp. `viadot/docs`) +## Next steps -Please follow the standards and best practices used within the library (eg. when adding tasks, see how other tasks are constructed, etc.). For any questions, please reach out to us here on GitHub. +Head over to the [Getting Started](./getting_started/getting_started.md) guide to learn how to set up `viadot`. diff --git a/docs/references/api_sources.md b/docs/references/api_sources.md deleted file mode 100644 index 08a87978e..000000000 --- a/docs/references/api_sources.md +++ /dev/null @@ -1,5 +0,0 @@ -# API Sources - -::: viadot.sources.uk_carbon_intensity.UKCarbonIntensity - -::: viadot.sources.cloud_for_customers.CloudForCustomers diff --git a/docs/references/orchestration/prefect/flows.md b/docs/references/orchestration/prefect/flows.md new file mode 100644 index 000000000..03cc53e78 --- /dev/null +++ b/docs/references/orchestration/prefect/flows.md @@ -0,0 +1,19 @@ +::: viadot.orchestration.prefect.flows.cloud_for_customers_to_adls + +::: viadot.orchestration.prefect.flows.cloud_for_customers_to_databricks + +::: viadot.orchestration.prefect.flows.exchange_rates_to_adls + +::: viadot.orchestration.prefect.flows.sap_to_redshift_spectrum + +::: viadot.orchestration.prefect.flows.sharepoint_to_adls + +::: viadot.orchestration.prefect.flows.sharepoint_to_databricks + +::: viadot.orchestration.prefect.flows.sharepoint_to_redshift_spectrum + +::: viadot.orchestration.prefect.flows.sharepoint_to_s3 + +::: viadot.orchestration.prefect.flows.transform + +::: viadot.orchestration.prefect.flows.transform_and_catalog diff --git a/docs/references/orchestration/prefect/tasks.md b/docs/references/orchestration/prefect/tasks.md new file mode 100644 index 000000000..78ebf516b --- /dev/null +++ b/docs/references/orchestration/prefect/tasks.md @@ -0,0 +1,23 @@ +::: viadot.orchestration.prefect.tasks.adls_upload + +::: viadot.orchestration.prefect.tasks.df_to_adls + +::: viadot.orchestration.prefect.tasks.cloud_for_customers_to_df + +::: viadot.orchestration.prefect.tasks.df_to_databricks + +::: viadot.orchestration.prefect.tasks.dbt_task + +::: viadot.orchestration.prefect.tasks.exchange_rates_to_df + +::: viadot.orchestration.prefect.tasks.clone_repo + +::: viadot.orchestration.prefect.tasks.luma_ingest_task + +::: viadot.orchestration.prefect.tasks.df_to_redshift_spectrum + +::: viadot.orchestration.prefect.tasks.s3_upload_file + +::: viadot.orchestration.prefect.tasks.sharepoint_download_file + +::: viadot.orchestration.prefect.tasks.sharepoint_to_df diff --git a/docs/references/sources/api_sources.md b/docs/references/sources/api_sources.md new file mode 100644 index 000000000..91e64177c --- /dev/null +++ b/docs/references/sources/api_sources.md @@ -0,0 +1,17 @@ +# API Sources + +::: viadot.sources.uk_carbon_intensity.UKCarbonIntensity + +::: viadot.sources.cloud_for_customers.CloudForCustomers + +::: viadot.sources.exchange_rates.ExchangeRates + +::: viadot.sources.cloud_for_customers.CloudForCustomers + +::: viadot.sources.sap_rfc.SAPRFC + +::: viadot.sources.sharepoint.Sharepoint + +::: viadot.sources.genesys.Genesys + +::: viadot.sources.minio.MinIO diff --git a/docs/references/sources/sql_sources.md b/docs/references/sources/sql_sources.md new file mode 100644 index 000000000..f2f57ebb1 --- /dev/null +++ b/docs/references/sources/sql_sources.md @@ -0,0 +1,19 @@ +# SQL Sources + +::: viadot.sources.base.Source + +::: viadot.sources.base.SQL + +::: viadot.sources.azure_data_lake.AzureDataLake + +::: viadot.sources.redshift_spectrum.RedshiftSpectrum + +::: viadot.sources.s3.S3 + +::: viadot.sources.sqlite.SQLite + +::: viadot.sources.sql_server.SQLServer + +::: viadot.sources.databricks.Databricks + +::: viadot.sources.trino.Trino diff --git a/docs/references/sql_sources.md b/docs/references/sql_sources.md deleted file mode 100644 index 14b3aaba0..000000000 --- a/docs/references/sql_sources.md +++ /dev/null @@ -1,8 +0,0 @@ -# SQL Sources - -::: viadot.sources.base.Source -::: viadot.sources.base.SQL - -::: viadot.sources.azure_data_lake.AzureDataLake - -::: viadot.sources.sqlite.SQLite diff --git a/docs/tutorials/adding_source.md b/docs/tutorials/adding_source.md deleted file mode 100644 index ac6cf9599..000000000 --- a/docs/tutorials/adding_source.md +++ /dev/null @@ -1,13 +0,0 @@ -# Adding a source - -## 1. Add a source - -To add a source, create a new file in `viadot/sources`. The source must inherit from the `Source` base class and accept a `credentials` parameter. - -## 2. Add a task - -Within the task, you should handle the authentication to the source. For this, utilize either a Prefect secret or the Azure Key Vault secret. See existing tasks, eg. `AzureDataLakeDownload`, for reference. Note that we sometimes also provide a default value for the secret name which is stored as a Prefect secret itself. This is so that you can safely publish your flow code in the "infrastructure as code" spirit, without revealing the names of the actual keys used in your vault. You can instead only provide the name of the Prefect secret holding the actual name. These defaults can be configured in your local Prefect config (`.prefect/config.toml`) or in Prefect cloud. For example, let's say you have a secret set by another department in your organization called `my_service_principal`, storing the credentials of the service account used to authenticate to the data lake. Let's assume the name of this service account should be protected. With the implementation used eg. in `AzureDataLakeDownload`, you can create Prefect secret called eg. `my_service_account_1_name` and only refer to this secret in your flow, eg. in this task, by setting `sp_credentials_secret` to `my_service_account_1_name`. - -## 3. Integrate into a flow - -Now you can finally integrate the source into a full flow. See [Adding a flow](../tutorials/adding_flow.md) diff --git a/docs/tutorials/databricks.md b/docs/tutorials/databricks.md deleted file mode 100644 index 3932d33dd..000000000 --- a/docs/tutorials/databricks.md +++ /dev/null @@ -1,5 +0,0 @@ -# Databricks - -## Credentials - -In order to retrieve values for the credentials, follow step 2 [here](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect). diff --git a/docs/tutorials/supermetrics.md b/docs/tutorials/supermetrics.md deleted file mode 100644 index 41542321c..000000000 --- a/docs/tutorials/supermetrics.md +++ /dev/null @@ -1,13 +0,0 @@ -# How to pull Supermetrics data - -With `viadot` you have the opportunity to pull data from Supermetrics API and save it in parquet format on the Azure Data Lake. You can also load this data do Azure SQL Database. - -If you need more info about Supermetrics API, please visit https://supermetrics.com/docs/product-api-getting-started/ - -## Pull data from Supermetrics - -Let's pull data from Supermetrics and save output as a parquet file on Azure Data Lake. - -To pull data from Supermetrics we will create flow basing on `SupermetricsToADLS`. - -Data types are automatically detected and mapped to meet Microsoft Azure SQL Database requirements. Schema json will be stored in the data lake (parquet_file_directory/schema) diff --git a/mkdocs.yml b/mkdocs.yml index 769e2c0b6..1541d743e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,18 +6,27 @@ edit_uri: blob/main/docs/ nav: - Home: index.md - - Howtos: - - Configuration: howtos/config_file.md + - Getting Started: + - Getting Started: getting_started.md - - Tutorials: - - Adding a new source: tutorials/adding_source.md - - Databricks credentials: tutorials/databricks.md - - Scheduling data ingestion from Supermetrics: tutorials/supermetrics.md - - Migrating onto viadot 2.0: howtos/howto_migrate_sources_tasks_and_flows.md + - Developer Guide: + - Introduction: developer_guide/index.md + - Creating a source: developer_guide/creating_a_source.md + - Creating a Prefect flow: developer_guide/creating_a_prefect_flow.md + - Contributing: developer_guide/contributing_to_viadot.md + + - Advanced Usage: + - Introduction: advanced_usage/index.md + - Containerized development environment: advanced_usage/containerized_env.md - References: - - SQL Sources: references/sql_sources.md - - API Sources: references/api_sources.md + - Sources: + - SQL Sources: references/sources/sql_sources.md + - API Sources: references/sources/api_sources.md + - Orchestration: + - Prefect: + - Tasks: references/orchestration/prefect/tasks.md + - Flows: references/orchestration/prefect/flows.md theme: name: "material" diff --git a/pyproject.toml b/pyproject.toml index cb9b2a1a4..7779c2150 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,55 +1,70 @@ [project] name = "viadot2" -version = "2.0a26" +version = "2.1.0" description = "A simple data ingestion library to guide data flows from some places to other places." authors = [ { name = "acivitillo", email = "acivitillo@dyvenia.com" }, { name = "trymzet", email = "mzawadzki@dyvenia.com" }, ] dependencies = [ - "azure-core==1.25.0", - "azure-storage-blob==12.13.1", - "awswrangler==2.19.0", - "s3fs==2022.11.0", - "boto3==1.24.59", - "pandas==1.4.4", - "pyarrow==10.0.1", - "pyodbc>=4.0.34, <4.1.0", - "openpyxl==3.0.10", - "jupyterlab==3.2.4", - "matplotlib>=3.8.3", - "adlfs==2022.9.1", - "Shapely==1.8.0", - "imagehash==4.2.1", - "visions==0.7.5", - "sharepy>=2.0.0, <2.1.0", - "simple_salesforce==1.11.5", - "sql-metadata>=2.3.0", - "duckdb==0.5.1", - "sendgrid==6.9.7", - "pandas-gbq==0.19.1", - "pyyaml>=6.0.1", - "pydantic==1.10.11", - "aiolimiter==1.0.0", - "trino==0.326.*", + "pyodbc>=5.1.0", + "visions>=0.6.4", + "sharepy>=2.0.0", + "sql-metadata>=2.11.0", + "sendgrid>=6.11.0", + "pydantic>=1.10.9,<1.11.0", + "aiolimiter>=1.1.0", + "trino==0.328.*", + "imagehash>=4.2.1", + "shapely>=1.8.0", "sqlalchemy==2.0.*", - "minio>=7.0, <8.0", - "azure-identity>=1.15.0", + "lumacli>=0.1.2, <0.2.0", + "pygit2>=1.13.3, <1.15.0", + "openpyxl>=3.0.0", + "prefect>=2.19.7, <3", + "prefect-sqlalchemy>=0.4.3", + "pandas>=1.2.0", + "duckdb==1.0.0", + "requests>=2.32.3", + "prefect-github>=0.2.7", + "o365>=2.0.36", + # v10.1 is the maximum version compatible with awswrangler 2.x. + "pyarrow>=10.0, <10.1.0", + # numpy>=2.0 is not compatible with the old pyarrow v10.x. + "numpy>=1.23.4, <2.0", ] requires-python = ">=3.10" readme = "README.md" [project.optional-dependencies] databricks = ["databricks-connect==11.3.*"] +azure = [ + "azure-core==1.30.1", + "azure-storage-blob==12.20.0", + "adlfs==2024.4.1", + "azure-identity>=1.16.0", + "dbt-sqlserver>=1.3, <1.8", + "prefect-azure @ git+https://github.com/Trymzet/prefect-azure@add_keyvault_auth#egg=prefect-azure", + "prefect_github", +] +aws = [ + "s3fs==2024.6.0", + "boto3==1.34.106", + "dbt-redshift>=1.3, <1.8", + "minio>=7.0, <8.0", + "awswrangler>=2.20.1, <3.0", + "prefect-aws>=0.4.19", +] +sap = ["pyrfc<3.0"] [tool.rye] +managed = true dev-dependencies = [ - "black==22.6.0", - "pytest==8.1.1", - "pytest-cov==3.0.0", "Faker==13.12.1", "python-dotenv>=1.0.1", - "coverage>=7.4.4", + "coverage[toml]>=7.6.1", + "aiohttp>=3.9.5", + "loguru>=0.7.2", "mkdocs-material[imaging]>=9.4.3", "mkdocs-mermaid2-plugin>=1.1.0", "mkdocs-include-dir-to-nav>=1.2.0", @@ -62,13 +77,131 @@ dev-dependencies = [ "mkdocs-include-markdown-plugin>=5.1.0", "mkdocs-git-revision-date-plugin>=0.3.2", "mkdocs-glightbox>=0.4.0", + "pytest>=8.2.2", + "ruff>=0.5.2", + "pytest-asyncio>=0.23.8", + "moto>=5.0.13", ] [tool.pytest.ini_options] +asyncio_mode = "auto" log_format = "%(asctime)s %(levelname)s %(message)s" log_date_format = "%Y-%m-%d %H:%M:%S" log_cli = true log_level = "WARNING" +addopts = "--ignore=dbt_packages --import-mode=importlib" [tool.coverage.run] -omit = ['tests/*'] +omit = ['tests/integration/*'] + +[tool.coverage.report] +show_missing = true +fail_under = 30 + +[tool.ruff.lint] +# Last rule review: ruff 0.1.5 +extend-select = [ + "I", # isort + "D", # pydocstyle + "W", # pycodestyle (warnings) + "B", # flake8-bugbear + "S", # flake8-bandit + "ANN", # flake8-annotations + "A", # flake8-builtins + "C4", # flake8-comprehensions + "EM", # flake8-errmsg + "T20", # flake8-print + "PT", # flake8-pytest-style + "RET", # flake8-return + "SIM", # flake8-simplify + "ARG", # flake8-unused-arguments + "PTH", # flake8-use-pathlib + "N", # pep8-naming + "UP", # pyupgrade + "C901", # mccabe + "FURB", # refurb + "TRY", # tryceratops + # "PD", # https://docs.astral.sh/ruff/rules/#pandas-vet-pd + "PL", # pylint + "RUF", # Ruff-specific rules +] + +# Ignore rules known to be conflicting between the ruff linter and formatter. +# See https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules +ignore = [ + "W191", + "D206", + "D300", + "D101", + "D417", + "ANN101", + "ANN102", # Type annotation for `cls`. + "ANN002", + "ANN003", + "ANN202", # Return type annotation of private functions. + "ANN204", +] + +[tool.ruff.lint.extend-per-file-ignores] +# S101: use of assert error +# ANN201, ANN202, ANN001: missing typing +# D103: missing function docstrings +# D100: missing module docstring +# N802, N803: caps lock argument and function names (disabled to allow naming fixtures in all caps) +"tests/**" = [ + "S101", + "ANN201", + "ANN202", + "ANN001", + "D103", + "D100", + "N802", + "N803", + "B905", + "D102", + "PLR2004", +] + + +[tool.ruff.lint.mccabe] +max-complexity = 10 + +[tool.ruff.lint.isort] +force-sort-within-sections = true +lines-after-imports = 2 + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.pycodestyle] +max-doc-length = 88 + +[tool.ruff.lint.pylint] +max-args = 10 + +# For checking whether the docstrings match function signature. +# https://peps.python.org/pep-0727/ should basically solve this in Python 3.13. +[tool.pydoclint] +style = "google" +arg-type-hints-in-docstring = false +check-return-types = false +check-yield-types = false +allow-init-docstring = true + +[tool.mypy] +strict = false +warn_unreachable = true +pretty = true +show_column_numbers = true +show_error_context = true +exclude = "tests" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.build.targets.wheel] +packages = ["src/viadot"] diff --git a/requirements-dev.lock b/requirements-dev.lock index 1014d8200..4b7dd6109 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -6,305 +6,247 @@ # features: [] # all-features: false # with-sources: false +# generate-hashes: false +# universal: false -e file:. -adlfs==2022.9.1 - # via viadot2 -aenum==3.1.15 - # via gremlinpython -aiobotocore==2.4.2 - # via s3fs -aiohttp==3.9.3 - # via adlfs - # via aiobotocore - # via gremlinpython - # via s3fs -aioitertools==0.11.0 - # via aiobotocore -aiolimiter==1.0.0 +aiohappyeyeballs==2.4.0 + # via aiohttp +aiohttp==3.10.5 +aiolimiter==1.1.0 # via viadot2 aiosignal==1.3.1 # via aiohttp -anyio==3.7.1 +aiosqlite==0.20.0 + # via prefect +alembic==1.13.2 + # via prefect +anyio==4.4.0 # via httpx - # via jupyter-server -argon2-cffi==23.1.0 - # via jupyter-server - # via minio - # via nbclassic -argon2-cffi-bindings==21.2.0 - # via argon2-cffi -asn1crypto==1.5.1 - # via scramp + # via prefect +apprise==1.8.1 + # via prefect +asgi-lifespan==2.1.0 + # via prefect asttokens==2.4.1 # via stack-data async-timeout==4.0.3 # via aiohttp -attrs==23.2.0 + # via asyncpg +asyncpg==0.29.0 + # via prefect +attrs==24.2.0 # via aiohttp # via jsonschema # via referencing # via visions - # via zeep -authlib==1.3.0 - # via simple-salesforce -awswrangler==2.19.0 - # via viadot2 -azure-core==1.25.0 - # via adlfs - # via azure-identity - # via azure-storage-blob - # via msrest - # via viadot2 -azure-datalake-store==0.0.53 - # via adlfs -azure-identity==1.15.0 - # via adlfs - # via viadot2 -azure-storage-blob==12.13.1 - # via adlfs - # via viadot2 -babel==2.14.0 - # via jupyterlab-server +babel==2.16.0 # via mkdocs-material -backoff==2.2.1 - # via awswrangler beautifulsoup4==4.12.3 # via mkdocs-mermaid2-plugin # via nbconvert - # via redshift-connector -black==22.6.0 + # via o365 bleach==6.1.0 # via nbconvert -boto3==1.24.59 - # via awswrangler - # via redshift-connector - # via viadot2 -botocore==1.27.59 - # via aiobotocore - # via awswrangler +boto3==1.35.1 + # via moto +botocore==1.35.1 # via boto3 - # via redshift-connector + # via moto # via s3transfer -bracex==2.4 +bracex==2.5 # via wcmatch -cachetools==5.3.3 +cachetools==5.5.0 # via google-auth -cairocffi==1.7.0 + # via prefect +cairocffi==1.7.1 # via cairosvg cairosvg==2.7.1 # via mkdocs-material -certifi==2024.2.2 +certifi==2024.7.4 + # via apprise # via httpcore # via httpx - # via minio - # via msrest - # via opensearch-py + # via kubernetes # via requests -cffi==1.16.0 - # via argon2-cffi-bindings - # via azure-datalake-store +cffi==1.17.0 # via cairocffi # via cryptography + # via pygit2 charset-normalizer==3.3.2 # via requests click==8.1.7 - # via black + # via apprise # via mkdocs # via mkdocstrings # via neoteroi-mkdocs + # via prefect + # via typer + # via uvicorn +cloudpickle==3.0.0 + # via prefect colorama==0.4.6 # via griffe # via mkdocs-material comm==0.2.2 # via ipykernel -contourpy==1.2.0 - # via matplotlib -coverage==7.4.4 - # via pytest-cov -cryptography==42.0.5 - # via authlib - # via azure-identity - # via azure-storage-blob - # via msal - # via pyjwt +coolname==2.2.0 + # via prefect +coverage==7.6.1 +croniter==2.0.7 + # via prefect +cryptography==43.0.0 + # via moto + # via prefect cssselect2==0.7.0 # via cairosvg -cycler==0.12.1 - # via matplotlib -db-dtypes==1.2.0 - # via pandas-gbq -debugpy==1.8.1 +dateparser==1.2.0 + # via prefect +debugpy==1.8.5 # via ipykernel decorator==5.1.1 # via ipython defusedxml==0.7.1 # via cairosvg # via nbconvert -duckdb==0.5.1 +dnspython==2.6.1 + # via email-validator +docker==7.1.0 + # via prefect +duckdb==1.0.0 # via viadot2 editorconfig==0.12.4 # via jsbeautifier +email-validator==2.2.0 + # via pydantic essentials==1.1.5 # via essentials-openapi essentials-openapi==1.0.9 # via neoteroi-mkdocs et-xmlfile==1.1.0 # via openpyxl -exceptiongroup==1.2.0 +exceptiongroup==1.2.2 # via anyio # via ipython + # via prefect # via pytest executing==2.0.1 # via stack-data faker==13.12.1 -fastjsonschema==2.19.1 +fastjsonschema==2.20.0 # via nbformat -fonttools==4.50.0 - # via matplotlib frozenlist==1.4.1 # via aiohttp # via aiosignal -fsspec==2022.11.0 - # via adlfs - # via s3fs +fsspec==2024.6.1 + # via prefect ghp-import==2.1.0 # via mkdocs gitdb==4.0.11 # via gitpython gitpython==3.1.43 # via mkdocs-git-revision-date-plugin -google-api-core==2.18.0 - # via google-cloud-bigquery - # via google-cloud-bigquery-storage - # via google-cloud-core - # via pandas-gbq -google-auth==2.29.0 - # via google-api-core - # via google-auth-oauthlib - # via google-cloud-bigquery - # via google-cloud-core - # via pandas-gbq - # via pydata-google-auth -google-auth-oauthlib==1.2.0 - # via pandas-gbq - # via pydata-google-auth -google-cloud-bigquery==3.19.0 - # via pandas-gbq -google-cloud-bigquery-storage==2.24.0 - # via pandas-gbq -google-cloud-core==2.4.1 - # via google-cloud-bigquery -google-crc32c==1.5.0 - # via google-resumable-media -google-resumable-media==2.7.0 - # via google-cloud-bigquery -googleapis-common-protos==1.63.0 - # via google-api-core - # via grpcio-status +google-auth==2.34.0 + # via kubernetes +graphql-core==3.2.3 + # via sgqlc +graphviz==0.20.3 + # via prefect greenlet==3.0.3 # via sqlalchemy -gremlinpython==3.7.1 - # via awswrangler -griffe==0.45.3 +griffe==0.47.0 # via mkdocstrings-python -grpcio==1.62.1 - # via google-api-core - # via grpcio-status -grpcio-status==1.62.1 - # via google-api-core + # via prefect h11==0.14.0 # via httpcore + # via uvicorn +h2==4.1.0 + # via httpx +hpack==4.0.0 + # via h2 httpcore==1.0.5 # via httpx + # via prefect httpx==0.27.0 # via neoteroi-mkdocs -idna==3.6 + # via prefect +humanize==4.10.0 + # via jinja2-humanize-extension + # via prefect +hyperframe==6.0.1 + # via h2 +idna==3.7 # via anyio + # via email-validator # via httpx # via requests # via yarl -imagehash==4.2.1 +imagehash==4.3.1 # via viadot2 -importlib-metadata==7.1.0 +importlib-metadata==8.3.0 # via mike -importlib-resources==6.4.0 +importlib-resources==6.1.3 # via mike + # via prefect iniconfig==2.0.0 # via pytest -ipykernel==6.29.3 +ipykernel==6.29.5 # via mkdocs-jupyter - # via nbclassic -ipython==8.18.1 +ipython==8.26.0 # via ipykernel - # via jupyterlab -ipython-genutils==0.2.0 - # via nbclassic -isodate==0.6.1 - # via gremlinpython - # via msrest - # via zeep +itsdangerous==2.2.0 + # via prefect jedi==0.19.1 # via ipython -jinja2==3.1.3 - # via jupyter-server - # via jupyterlab - # via jupyterlab-server +jinja2==3.1.4 + # via jinja2-humanize-extension # via mike # via mkdocs # via mkdocs-git-revision-date-plugin # via mkdocs-material # via mkdocstrings - # via nbclassic + # via moto # via nbconvert # via neoteroi-mkdocs + # via prefect +jinja2-humanize-extension==0.4.0 + # via prefect jmespath==1.0.1 # via boto3 # via botocore jsbeautifier==1.15.1 # via mkdocs-mermaid2-plugin -json5==0.9.24 - # via jupyterlab-server -jsonpath-ng==1.6.1 - # via awswrangler -jsonschema==4.21.1 - # via jupyterlab-server +jsonpatch==1.33 + # via prefect +jsonpointer==3.0.0 + # via jsonpatch +jsonschema==4.23.0 # via nbformat + # via prefect jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-client==8.6.1 +jupyter-client==8.6.2 # via ipykernel - # via jupyter-server - # via nbclassic # via nbclient jupyter-core==5.7.2 # via ipykernel # via jupyter-client - # via jupyter-server - # via jupyterlab - # via nbclassic # via nbclient # via nbconvert # via nbformat -jupyter-server==1.24.0 - # via jupyterlab - # via jupyterlab-server - # via nbclassic - # via notebook-shim -jupyterlab==3.2.4 - # via viadot2 jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.25.4 - # via jupyterlab -jupytext==1.16.2 +jupytext==1.16.4 # via mkdocs-jupyter -kiwisolver==1.4.5 - # via matplotlib -lxml==5.1.0 - # via redshift-connector - # via zeep -markdown==3.3.7 +kubernetes==29.0.0 + # via prefect +loguru==0.7.2 +lumacli==0.1.2 + # via viadot2 +mako==1.3.5 + # via alembic +markdown==3.7 + # via apprise # via mkdocs # via mkdocs-autorefs # via mkdocs-material @@ -317,12 +259,13 @@ markdown-it-py==3.0.0 markupsafe==2.1.5 # via essentials-openapi # via jinja2 + # via mako # via mkdocs + # via mkdocs-autorefs # via mkdocstrings # via nbconvert -matplotlib==3.8.3 - # via viadot2 -matplotlib-inline==0.1.6 + # via werkzeug +matplotlib-inline==0.1.7 # via ipykernel # via ipython mdit-py-plugins==0.4.1 @@ -332,9 +275,7 @@ mdurl==0.1.2 mergedeep==1.3.4 # via mkdocs # via mkdocs-get-deps -mike==2.1.1 -minio==7.2.5 - # via viadot2 +mike==2.1.3 mistune==3.0.2 # via nbconvert mkdocs==1.6.0 @@ -349,330 +290,284 @@ mkdocs==1.6.0 # via mkdocs-table-reader-plugin # via mkdocstrings # via neoteroi-mkdocs -mkdocs-autorefs==0.4.1 +mkdocs-autorefs==1.0.1 # via mkdocstrings mkdocs-get-deps==0.2.0 # via mkdocs mkdocs-git-revision-date-plugin==0.3.2 mkdocs-glightbox==0.4.0 mkdocs-include-dir-to-nav==1.2.0 -mkdocs-include-markdown-plugin==6.2.0 -mkdocs-jupyter==0.24.7 -mkdocs-material==9.5.26 +mkdocs-include-markdown-plugin==6.2.2 +mkdocs-jupyter==0.24.8 +mkdocs-material==9.5.32 # via mkdocs-jupyter mkdocs-material-extensions==1.3.1 # via mkdocs-material mkdocs-mermaid2-plugin==1.1.1 -mkdocs-table-reader-plugin==2.2.2 -mkdocstrings==0.25.1 +mkdocs-table-reader-plugin==3.0.1 +mkdocstrings==0.25.2 # via mkdocstrings-python -mkdocstrings-python==1.10.3 +mkdocstrings-python==1.10.5 # via mkdocstrings -msal==1.28.0 - # via azure-datalake-store - # via azure-identity - # via msal-extensions -msal-extensions==0.3.1 - # via azure-identity -msrest==0.7.1 - # via azure-storage-blob +moto==5.0.13 multidict==6.0.5 # via aiohttp # via yarl -multimethod==1.11.2 +multimethod==1.12 # via visions -mypy-extensions==1.0.0 - # via black -nbclassic==0.5.6 - # via jupyterlab nbclient==0.10.0 # via nbconvert -nbconvert==7.16.3 - # via jupyter-server +nbconvert==7.16.4 # via mkdocs-jupyter - # via nbclassic -nbformat==5.10.3 - # via jupyter-server +nbformat==5.10.4 # via jupytext - # via nbclassic # via nbclient # via nbconvert -neoteroi-mkdocs==1.0.5 +neoteroi-mkdocs==1.1.0 nest-asyncio==1.6.0 - # via gremlinpython # via ipykernel - # via nbclassic -networkx==3.2.1 +networkx==3.3 # via visions -notebook-shim==0.2.4 - # via nbclassic -numpy==1.23.4 - # via awswrangler - # via contourpy - # via db-dtypes - # via duckdb +numpy==1.26.4 # via imagehash - # via matplotlib # via pandas - # via pandas-gbq # via pyarrow # via pywavelets # via scipy + # via shapely + # via viadot2 # via visions +o365==2.0.36 + # via viadot2 oauthlib==3.2.2 + # via kubernetes # via requests-oauthlib -openpyxl==3.0.10 - # via awswrangler +openpyxl==3.1.5 # via viadot2 -opensearch-py==2.4.2 - # via awswrangler -packaging==24.0 - # via db-dtypes - # via google-cloud-bigquery +orjson==3.10.7 + # via prefect +packaging==24.1 # via ipykernel - # via jupyter-server - # via jupyterlab - # via jupyterlab-server # via jupytext - # via matplotlib # via mkdocs # via nbconvert + # via prefect # via pytest - # via redshift-connector paginate==0.5.6 # via mkdocs-material -pandas==1.4.4 - # via awswrangler - # via db-dtypes +pandas==2.2.2 # via mkdocs-table-reader-plugin - # via pandas-gbq # via viadot2 # via visions -pandas-gbq==0.19.1 - # via viadot2 pandocfilters==1.5.1 # via nbconvert -parso==0.8.3 +parso==0.8.4 # via jedi pathspec==0.12.1 - # via black # via mkdocs + # via prefect +pendulum==2.1.2 + # via prefect pexpect==4.9.0 # via ipython -pg8000==1.30.5 - # via awswrangler -pillow==10.2.0 +pillow==10.4.0 # via cairosvg # via imagehash - # via matplotlib # via mkdocs-material -platformdirs==4.2.0 - # via black +platformdirs==4.2.2 # via jupyter-core # via mkdocs-get-deps # via mkdocstrings - # via zeep -pluggy==1.4.0 +pluggy==1.5.0 # via pytest -ply==3.11 - # via jsonpath-ng -portalocker==2.8.2 - # via msal-extensions -progressbar2==4.4.2 - # via awswrangler -prometheus-client==0.20.0 - # via jupyter-server - # via nbclassic -prompt-toolkit==3.0.43 +prefect==2.20.2 + # via prefect-github + # via prefect-sqlalchemy + # via viadot2 +prefect-github==0.2.7 + # via viadot2 +prefect-sqlalchemy==0.4.4 + # via viadot2 +prompt-toolkit==3.0.47 # via ipython -proto-plus==1.23.0 - # via google-api-core - # via google-cloud-bigquery-storage -protobuf==4.25.3 - # via google-api-core - # via google-cloud-bigquery-storage - # via googleapis-common-protos - # via grpcio-status - # via proto-plus -psutil==5.9.8 +psutil==6.0.0 # via ipykernel +psycopg2-binary==2.9.9 + # via lumacli ptyprocess==0.7.0 # via pexpect - # via terminado -pure-eval==0.2.2 +pure-eval==0.2.3 # via stack-data pyarrow==10.0.1 - # via awswrangler - # via db-dtypes - # via pandas-gbq # via viadot2 -pyasn1==0.5.1 +pyasn1==0.6.0 # via pyasn1-modules # via rsa -pyasn1-modules==0.3.0 +pyasn1-modules==0.4.0 # via google-auth -pycparser==2.21 +pycparser==2.22 # via cffi -pycryptodome==3.20.0 - # via minio -pydantic==1.10.11 +pydantic==1.10.17 + # via lumacli + # via prefect + # via viadot2 +pydantic-core==2.23.0 + # via prefect +pygit2==1.14.1 # via viadot2 -pydata-google-auth==1.8.2 - # via pandas-gbq -pygments==2.17.2 +pygments==2.18.0 # via ipython # via mkdocs-jupyter # via mkdocs-material # via nbconvert # via rich -pyjwt==2.8.0 - # via msal -pymdown-extensions==10.4 +pymdown-extensions==10.9 # via mkdocs-material # via mkdocs-mermaid2-plugin # via mkdocstrings -pymysql==1.1.0 - # via awswrangler -pyodbc==4.0.39 +pyodbc==5.1.0 # via viadot2 pyparsing==3.1.2 - # via matplotlib # via mike -pytest==8.1.1 - # via pytest-cov -pytest-cov==3.0.0 +pytest==8.3.2 + # via pytest-asyncio +pytest-asyncio==0.23.8 python-dateutil==2.9.0.post0 # via botocore + # via croniter + # via dateparser # via faker # via ghp-import - # via google-cloud-bigquery # via jupyter-client - # via matplotlib - # via opensearch-py + # via kubernetes + # via moto + # via o365 # via pandas - # via pg8000 + # via pendulum + # via prefect # via trino python-dotenv==1.0.1 python-http-client==3.3.7 # via sendgrid -python-utils==3.8.2 - # via progressbar2 +python-multipart==0.0.9 + # via prefect +python-slugify==8.0.4 + # via prefect pytz==2024.1 + # via croniter + # via dateparser # via pandas - # via redshift-connector + # via prefect # via trino - # via zeep -pywavelets==1.5.0 +pytzdata==2020.1 + # via pendulum +pywavelets==1.7.0 # via imagehash -pyyaml==6.0.1 +pyyaml==6.0.2 + # via apprise # via essentials-openapi # via jupytext + # via kubernetes + # via lumacli # via mike # via mkdocs # via mkdocs-get-deps # via mkdocs-table-reader-plugin + # via prefect # via pymdown-extensions # via pyyaml-env-tag - # via viadot2 + # via responses pyyaml-env-tag==0.1 # via mike # via mkdocs -pyzmq==25.1.2 +pyzmq==26.1.1 # via ipykernel # via jupyter-client - # via jupyter-server - # via nbclassic -redshift-connector==2.0.918 - # via awswrangler -referencing==0.34.0 +readchar==4.2.0 + # via prefect +referencing==0.35.1 # via jsonschema # via jsonschema-specifications -regex==2024.5.15 +regex==2024.7.24 + # via dateparser # via mkdocs-material -requests==2.31.0 - # via azure-core - # via azure-datalake-store - # via google-api-core - # via google-cloud-bigquery - # via jupyterlab-server +requests==2.32.3 + # via apprise + # via docker + # via kubernetes + # via lumacli # via mkdocs-material # via mkdocs-mermaid2-plugin - # via msal - # via msrest - # via opensearch-py - # via redshift-connector - # via requests-aws4auth - # via requests-file + # via moto + # via o365 # via requests-oauthlib - # via requests-toolbelt + # via responses # via sharepy - # via simple-salesforce # via trino - # via zeep -requests-aws4auth==1.2.3 - # via awswrangler -requests-file==2.0.0 - # via zeep -requests-oauthlib==1.4.0 - # via google-auth-oauthlib - # via msrest -requests-toolbelt==1.0.0 - # via zeep + # via viadot2 +requests-oauthlib==2.0.0 + # via apprise + # via kubernetes + # via o365 +responses==0.25.3 + # via moto +rfc3339-validator==0.1.4 + # via prefect rich==13.7.1 + # via lumacli # via neoteroi-mkdocs -rpds-py==0.18.0 + # via prefect + # via typer +rpds-py==0.20.0 # via jsonschema # via referencing rsa==4.9 # via google-auth -s3fs==2022.11.0 - # via viadot2 -s3transfer==0.6.2 +ruamel-yaml==0.18.6 + # via prefect +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml +ruff==0.6.1 +s3transfer==0.10.2 # via boto3 -scipy==1.12.0 +scipy==1.14.0 # via imagehash -scramp==1.4.4 - # via pg8000 - # via redshift-connector -send2trash==1.8.2 - # via jupyter-server - # via nbclassic -sendgrid==6.9.7 +sendgrid==6.11.0 # via viadot2 -setuptools==69.2.0 +setuptools==73.0.0 # via mkdocs-mermaid2-plugin - # via pandas-gbq - # via pydata-google-auth - # via redshift-connector -shapely==1.8.0 +sgqlc==16.3 + # via prefect-github +shapely==2.0.6 # via viadot2 sharepy==2.0.0 # via viadot2 -simple-salesforce==1.11.5 - # via viadot2 +shellingham==1.5.4 + # via typer six==1.16.0 # via asttokens - # via azure-core # via bleach - # via gremlinpython - # via imagehash - # via isodate # via jsbeautifier - # via opensearch-py + # via kubernetes # via python-dateutil - # via requests-aws4auth + # via rfc3339-validator smmap==5.0.1 # via gitdb sniffio==1.3.1 # via anyio + # via asgi-lifespan # via httpx -soupsieve==2.5 + # via prefect +soupsieve==2.6 # via beautifulsoup4 sql-metadata==2.12.0 # via viadot2 -sqlalchemy==2.0.28 +sqlalchemy==2.0.32 + # via alembic + # via prefect + # via prefect-sqlalchemy # via viadot2 -sqlparse==0.4.4 +sqlparse==0.5.1 # via sql-metadata stack-data==0.6.3 # via ipython @@ -680,61 +575,71 @@ starkbank-ecdsa==2.2.0 # via sendgrid tabulate==0.9.0 # via mkdocs-table-reader-plugin -tangled-up-in-unicode==0.2.0 - # via visions -terminado==0.18.1 - # via jupyter-server - # via nbclassic -tinycss2==1.2.1 +text-unidecode==1.3 + # via python-slugify +tinycss2==1.3.0 # via cairosvg # via cssselect2 # via nbconvert +toml==0.10.2 + # via prefect tomli==2.0.1 - # via black # via coverage # via jupytext # via pytest -tornado==6.4 +tornado==6.4.1 # via ipykernel # via jupyter-client - # via jupyter-server - # via jupyterlab - # via nbclassic - # via terminado -traitlets==5.14.2 +traitlets==5.14.3 # via comm # via ipykernel # via ipython # via jupyter-client # via jupyter-core - # via jupyter-server # via matplotlib-inline - # via nbclassic # via nbclient # via nbconvert # via nbformat -trino==0.326.0 +trino==0.328.0 # via viadot2 -typing-extensions==4.10.0 - # via azure-core - # via minio +typer==0.12.4 + # via lumacli + # via prefect +typing-extensions==4.12.2 + # via aiosqlite + # via alembic + # via anyio + # via ipython + # via prefect # via pydantic - # via python-utils + # via pydantic-core # via sqlalchemy + # via typer + # via uvicorn +tzdata==2024.1 + # via o365 + # via pandas tzlocal==5.2 + # via dateparser + # via o365 # via trino -urllib3==1.26.18 +ujson==5.10.0 + # via prefect +urllib3==2.2.2 # via botocore - # via minio - # via opensearch-py + # via docker + # via kubernetes # via requests + # via responses +uvicorn==0.30.6 + # via prefect verspec==0.1.0 # via mike -visions==0.7.5 +visions==0.7.6 # via viadot2 -watchdog==4.0.0 +watchdog==4.0.2 # via mkdocs -wcmatch==8.5.2 +wcmatch==9.0 # via mkdocs-include-markdown-plugin wcwidth==0.2.13 # via prompt-toolkit @@ -742,13 +647,15 @@ webencodings==0.5.1 # via bleach # via cssselect2 # via tinycss2 -websocket-client==1.7.0 - # via jupyter-server -wrapt==1.16.0 - # via aiobotocore +websocket-client==1.8.0 + # via kubernetes +websockets==12.0 + # via prefect +werkzeug==3.0.3 + # via moto +xmltodict==0.13.0 + # via moto yarl==1.9.4 # via aiohttp -zeep==4.2.1 - # via simple-salesforce -zipp==3.18.1 +zipp==3.20.0 # via importlib-metadata diff --git a/requirements.lock b/requirements.lock index 42738a429..50485ed67 100644 --- a/requirements.lock +++ b/requirements.lock @@ -6,556 +6,346 @@ # features: [] # all-features: false # with-sources: false +# generate-hashes: false +# universal: false -e file:. -adlfs==2022.9.1 +aiolimiter==1.1.0 # via viadot2 -aenum==3.1.15 - # via gremlinpython -aiobotocore==2.4.2 - # via s3fs -aiohttp==3.9.3 - # via adlfs - # via aiobotocore - # via gremlinpython - # via s3fs -aioitertools==0.11.0 - # via aiobotocore -aiolimiter==1.0.0 - # via viadot2 -aiosignal==1.3.1 - # via aiohttp -anyio==3.7.1 - # via jupyter-server -argon2-cffi==23.1.0 - # via jupyter-server - # via minio - # via nbclassic -argon2-cffi-bindings==21.2.0 - # via argon2-cffi -asn1crypto==1.5.1 - # via scramp -asttokens==2.4.1 - # via stack-data +aiosqlite==0.20.0 + # via prefect +alembic==1.13.2 + # via prefect +anyio==4.4.0 + # via httpx + # via prefect +apprise==1.8.1 + # via prefect +asgi-lifespan==2.1.0 + # via prefect async-timeout==4.0.3 - # via aiohttp -attrs==23.2.0 - # via aiohttp + # via asyncpg +asyncpg==0.29.0 + # via prefect +attrs==24.2.0 # via jsonschema # via referencing # via visions - # via zeep -authlib==1.3.0 - # via simple-salesforce -awswrangler==2.19.0 - # via viadot2 -azure-core==1.25.0 - # via adlfs - # via azure-identity - # via azure-storage-blob - # via msrest - # via viadot2 -azure-datalake-store==0.0.53 - # via adlfs -azure-identity==1.15.0 - # via adlfs - # via viadot2 -azure-storage-blob==12.13.1 - # via adlfs - # via viadot2 -babel==2.14.0 - # via jupyterlab-server -backoff==2.2.1 - # via awswrangler beautifulsoup4==4.12.3 - # via nbconvert - # via redshift-connector -bleach==6.1.0 - # via nbconvert -boto3==1.24.59 - # via awswrangler - # via redshift-connector - # via viadot2 -botocore==1.27.59 - # via aiobotocore - # via awswrangler - # via boto3 - # via redshift-connector - # via s3transfer -cachetools==5.3.3 + # via o365 +cachetools==5.5.0 # via google-auth -certifi==2024.2.2 - # via minio - # via msrest - # via opensearch-py + # via prefect +certifi==2024.7.4 + # via apprise + # via httpcore + # via httpx + # via kubernetes # via requests -cffi==1.16.0 - # via argon2-cffi-bindings - # via azure-datalake-store +cffi==1.17.0 # via cryptography + # via pygit2 charset-normalizer==3.3.2 # via requests -comm==0.2.2 - # via ipykernel -contourpy==1.2.0 - # via matplotlib -cryptography==42.0.5 - # via authlib - # via azure-identity - # via azure-storage-blob - # via msal - # via pyjwt -cycler==0.12.1 - # via matplotlib -db-dtypes==1.2.0 - # via pandas-gbq -debugpy==1.8.1 - # via ipykernel -decorator==5.1.1 - # via ipython -defusedxml==0.7.1 - # via nbconvert -duckdb==0.5.1 +click==8.1.7 + # via apprise + # via prefect + # via typer + # via uvicorn +cloudpickle==3.0.0 + # via prefect +colorama==0.4.6 + # via griffe +coolname==2.2.0 + # via prefect +croniter==2.0.7 + # via prefect +cryptography==43.0.0 + # via prefect +dateparser==1.2.0 + # via prefect +dnspython==2.6.1 + # via email-validator +docker==7.1.0 + # via prefect +duckdb==1.0.0 # via viadot2 +email-validator==2.2.0 + # via pydantic et-xmlfile==1.1.0 # via openpyxl -exceptiongroup==1.2.0 +exceptiongroup==1.2.2 # via anyio - # via ipython -executing==2.0.1 - # via stack-data -fastjsonschema==2.19.1 - # via nbformat -fonttools==4.50.0 - # via matplotlib -frozenlist==1.4.1 - # via aiohttp - # via aiosignal -fsspec==2022.11.0 - # via adlfs - # via s3fs -google-api-core==2.18.0 - # via google-cloud-bigquery - # via google-cloud-bigquery-storage - # via google-cloud-core - # via pandas-gbq -google-auth==2.29.0 - # via google-api-core - # via google-auth-oauthlib - # via google-cloud-bigquery - # via google-cloud-core - # via pandas-gbq - # via pydata-google-auth -google-auth-oauthlib==1.2.0 - # via pandas-gbq - # via pydata-google-auth -google-cloud-bigquery==3.19.0 - # via pandas-gbq -google-cloud-bigquery-storage==2.24.0 - # via pandas-gbq -google-cloud-core==2.4.1 - # via google-cloud-bigquery -google-crc32c==1.5.0 - # via google-resumable-media -google-resumable-media==2.7.0 - # via google-cloud-bigquery -googleapis-common-protos==1.63.0 - # via google-api-core - # via grpcio-status + # via prefect +fsspec==2024.6.1 + # via prefect +google-auth==2.34.0 + # via kubernetes +graphql-core==3.2.3 + # via sgqlc +graphviz==0.20.3 + # via prefect greenlet==3.0.3 # via sqlalchemy -gremlinpython==3.7.1 - # via awswrangler -grpcio==1.62.1 - # via google-api-core - # via grpcio-status -grpcio-status==1.62.1 - # via google-api-core -idna==3.6 +griffe==0.47.0 + # via prefect +h11==0.14.0 + # via httpcore + # via uvicorn +h2==4.1.0 + # via httpx +hpack==4.0.0 + # via h2 +httpcore==1.0.5 + # via httpx + # via prefect +httpx==0.27.0 + # via prefect +humanize==4.10.0 + # via jinja2-humanize-extension + # via prefect +hyperframe==6.0.1 + # via h2 +idna==3.7 # via anyio + # via email-validator + # via httpx # via requests - # via yarl -imagehash==4.2.1 +imagehash==4.3.1 # via viadot2 -ipykernel==6.29.3 - # via nbclassic -ipython==8.18.1 - # via ipykernel - # via jupyterlab -ipython-genutils==0.2.0 - # via nbclassic -isodate==0.6.1 - # via gremlinpython - # via msrest - # via zeep -jedi==0.19.1 - # via ipython -jinja2==3.1.3 - # via jupyter-server - # via jupyterlab - # via jupyterlab-server - # via nbclassic - # via nbconvert -jmespath==1.0.1 - # via boto3 - # via botocore -json5==0.9.24 - # via jupyterlab-server -jsonpath-ng==1.6.1 - # via awswrangler -jsonschema==4.21.1 - # via jupyterlab-server - # via nbformat +importlib-resources==6.1.3 + # via prefect +itsdangerous==2.2.0 + # via prefect +jinja2==3.1.4 + # via jinja2-humanize-extension + # via prefect +jinja2-humanize-extension==0.4.0 + # via prefect +jsonpatch==1.33 + # via prefect +jsonpointer==3.0.0 + # via jsonpatch +jsonschema==4.23.0 + # via prefect jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-client==8.6.1 - # via ipykernel - # via jupyter-server - # via nbclassic - # via nbclient -jupyter-core==5.7.2 - # via ipykernel - # via jupyter-client - # via jupyter-server - # via jupyterlab - # via nbclassic - # via nbclient - # via nbconvert - # via nbformat -jupyter-server==1.24.0 - # via jupyterlab - # via jupyterlab-server - # via nbclassic - # via notebook-shim -jupyterlab==3.2.4 +kubernetes==29.0.0 + # via prefect +lumacli==0.1.2 # via viadot2 -jupyterlab-pygments==0.3.0 - # via nbconvert -jupyterlab-server==2.25.4 - # via jupyterlab -kiwisolver==1.4.5 - # via matplotlib -lxml==5.1.0 - # via redshift-connector - # via zeep +mako==1.3.5 + # via alembic +markdown==3.7 + # via apprise +markdown-it-py==3.0.0 + # via rich markupsafe==2.1.5 # via jinja2 - # via nbconvert -matplotlib==3.8.3 - # via viadot2 -matplotlib-inline==0.1.6 - # via ipykernel - # via ipython -minio==7.2.5 - # via viadot2 -mistune==3.0.2 - # via nbconvert -msal==1.28.0 - # via azure-datalake-store - # via azure-identity - # via msal-extensions -msal-extensions==0.3.1 - # via azure-identity -msrest==0.7.1 - # via azure-storage-blob -multidict==6.0.5 - # via aiohttp - # via yarl -multimethod==1.11.2 + # via mako +mdurl==0.1.2 + # via markdown-it-py +multimethod==1.12 # via visions -nbclassic==0.5.6 - # via jupyterlab -nbclient==0.10.0 - # via nbconvert -nbconvert==7.16.3 - # via jupyter-server - # via nbclassic -nbformat==5.10.3 - # via jupyter-server - # via nbclassic - # via nbclient - # via nbconvert -nest-asyncio==1.6.0 - # via gremlinpython - # via ipykernel - # via nbclassic -networkx==3.2.1 +networkx==3.3 # via visions -notebook-shim==0.2.4 - # via nbclassic -numpy==1.23.4 - # via awswrangler - # via contourpy - # via db-dtypes - # via duckdb +numpy==1.26.4 # via imagehash - # via matplotlib # via pandas - # via pandas-gbq # via pyarrow # via pywavelets # via scipy + # via shapely + # via viadot2 # via visions +o365==2.0.36 + # via viadot2 oauthlib==3.2.2 + # via kubernetes # via requests-oauthlib -openpyxl==3.0.10 - # via awswrangler +openpyxl==3.1.5 # via viadot2 -opensearch-py==2.4.2 - # via awswrangler -packaging==24.0 - # via db-dtypes - # via google-cloud-bigquery - # via ipykernel - # via jupyter-server - # via jupyterlab - # via jupyterlab-server - # via matplotlib - # via nbconvert - # via redshift-connector -pandas==1.4.4 - # via awswrangler - # via db-dtypes - # via pandas-gbq +orjson==3.10.7 + # via prefect +packaging==24.1 + # via prefect +pandas==2.2.2 # via viadot2 # via visions -pandas-gbq==0.19.1 - # via viadot2 -pandocfilters==1.5.1 - # via nbconvert -parso==0.8.3 - # via jedi -pexpect==4.9.0 - # via ipython -pg8000==1.30.5 - # via awswrangler -pillow==10.2.0 +pathspec==0.12.1 + # via prefect +pendulum==2.1.2 + # via prefect +pillow==10.4.0 # via imagehash - # via matplotlib -platformdirs==4.2.0 - # via jupyter-core - # via zeep -ply==3.11 - # via jsonpath-ng -portalocker==2.8.2 - # via msal-extensions -progressbar2==4.4.2 - # via awswrangler -prometheus-client==0.20.0 - # via jupyter-server - # via nbclassic -prompt-toolkit==3.0.43 - # via ipython -proto-plus==1.23.0 - # via google-api-core - # via google-cloud-bigquery-storage -protobuf==4.25.3 - # via google-api-core - # via google-cloud-bigquery-storage - # via googleapis-common-protos - # via grpcio-status - # via proto-plus -psutil==5.9.8 - # via ipykernel -ptyprocess==0.7.0 - # via pexpect - # via terminado -pure-eval==0.2.2 - # via stack-data +prefect==2.20.2 + # via prefect-github + # via prefect-sqlalchemy + # via viadot2 +prefect-github==0.2.7 + # via viadot2 +prefect-sqlalchemy==0.4.4 + # via viadot2 +psycopg2-binary==2.9.9 + # via lumacli pyarrow==10.0.1 - # via awswrangler - # via db-dtypes - # via pandas-gbq # via viadot2 -pyasn1==0.5.1 +pyasn1==0.6.0 # via pyasn1-modules # via rsa -pyasn1-modules==0.3.0 +pyasn1-modules==0.4.0 # via google-auth -pycparser==2.21 +pycparser==2.22 # via cffi -pycryptodome==3.20.0 - # via minio -pydantic==1.10.11 +pydantic==1.10.17 + # via lumacli + # via prefect # via viadot2 -pydata-google-auth==1.8.2 - # via pandas-gbq -pygments==2.17.2 - # via ipython - # via nbconvert -pyjwt==2.8.0 - # via msal -pymysql==1.1.0 - # via awswrangler -pyodbc==4.0.39 +pydantic-core==2.23.0 + # via prefect +pygit2==1.14.1 + # via viadot2 +pygments==2.18.0 + # via rich +pyodbc==5.1.0 # via viadot2 -pyparsing==3.1.2 - # via matplotlib python-dateutil==2.9.0.post0 - # via botocore - # via google-cloud-bigquery - # via jupyter-client - # via matplotlib - # via opensearch-py + # via croniter + # via dateparser + # via kubernetes + # via o365 # via pandas - # via pg8000 + # via pendulum + # via prefect # via trino python-http-client==3.3.7 # via sendgrid -python-utils==3.8.2 - # via progressbar2 +python-multipart==0.0.9 + # via prefect +python-slugify==8.0.4 + # via prefect pytz==2024.1 + # via croniter + # via dateparser # via pandas - # via redshift-connector + # via prefect # via trino - # via zeep -pywavelets==1.5.0 +pytzdata==2020.1 + # via pendulum +pywavelets==1.7.0 # via imagehash -pyyaml==6.0.1 - # via viadot2 -pyzmq==25.1.2 - # via ipykernel - # via jupyter-client - # via jupyter-server - # via nbclassic -redshift-connector==2.0.918 - # via awswrangler -referencing==0.34.0 +pyyaml==6.0.2 + # via apprise + # via kubernetes + # via lumacli + # via prefect +readchar==4.2.0 + # via prefect +referencing==0.35.1 # via jsonschema # via jsonschema-specifications -requests==2.31.0 - # via azure-core - # via azure-datalake-store - # via google-api-core - # via google-cloud-bigquery - # via jupyterlab-server - # via msal - # via msrest - # via opensearch-py - # via redshift-connector - # via requests-aws4auth - # via requests-file +regex==2024.7.24 + # via dateparser +requests==2.32.3 + # via apprise + # via docker + # via kubernetes + # via lumacli + # via o365 # via requests-oauthlib - # via requests-toolbelt # via sharepy - # via simple-salesforce # via trino - # via zeep -requests-aws4auth==1.2.3 - # via awswrangler -requests-file==2.0.0 - # via zeep -requests-oauthlib==1.4.0 - # via google-auth-oauthlib - # via msrest -requests-toolbelt==1.0.0 - # via zeep -rpds-py==0.18.0 + # via viadot2 +requests-oauthlib==2.0.0 + # via apprise + # via kubernetes + # via o365 +rfc3339-validator==0.1.4 + # via prefect +rich==13.7.1 + # via lumacli + # via prefect + # via typer +rpds-py==0.20.0 # via jsonschema # via referencing rsa==4.9 # via google-auth -s3fs==2022.11.0 - # via viadot2 -s3transfer==0.6.2 - # via boto3 -scipy==1.12.0 +ruamel-yaml==0.18.6 + # via prefect +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml +scipy==1.14.0 # via imagehash -scramp==1.4.4 - # via pg8000 - # via redshift-connector -send2trash==1.8.2 - # via jupyter-server - # via nbclassic -sendgrid==6.9.7 +sendgrid==6.11.0 # via viadot2 -setuptools==69.2.0 - # via pandas-gbq - # via pydata-google-auth - # via redshift-connector -shapely==1.8.0 +sgqlc==16.3 + # via prefect-github +shapely==2.0.6 # via viadot2 sharepy==2.0.0 # via viadot2 -simple-salesforce==1.11.5 - # via viadot2 +shellingham==1.5.4 + # via typer six==1.16.0 - # via asttokens - # via azure-core - # via bleach - # via gremlinpython - # via imagehash - # via isodate - # via opensearch-py + # via kubernetes # via python-dateutil - # via requests-aws4auth + # via rfc3339-validator sniffio==1.3.1 # via anyio -soupsieve==2.5 + # via asgi-lifespan + # via httpx + # via prefect +soupsieve==2.6 # via beautifulsoup4 sql-metadata==2.12.0 # via viadot2 -sqlalchemy==2.0.28 +sqlalchemy==2.0.32 + # via alembic + # via prefect + # via prefect-sqlalchemy # via viadot2 -sqlparse==0.4.4 +sqlparse==0.5.1 # via sql-metadata -stack-data==0.6.3 - # via ipython starkbank-ecdsa==2.2.0 # via sendgrid -tangled-up-in-unicode==0.2.0 - # via visions -terminado==0.18.1 - # via jupyter-server - # via nbclassic -tinycss2==1.2.1 - # via nbconvert -tornado==6.4 - # via ipykernel - # via jupyter-client - # via jupyter-server - # via jupyterlab - # via nbclassic - # via terminado -traitlets==5.14.2 - # via comm - # via ipykernel - # via ipython - # via jupyter-client - # via jupyter-core - # via jupyter-server - # via matplotlib-inline - # via nbclassic - # via nbclient - # via nbconvert - # via nbformat -trino==0.326.0 +text-unidecode==1.3 + # via python-slugify +toml==0.10.2 + # via prefect +trino==0.328.0 # via viadot2 -typing-extensions==4.10.0 - # via azure-core - # via minio +typer==0.12.4 + # via lumacli + # via prefect +typing-extensions==4.12.2 + # via aiosqlite + # via alembic + # via anyio + # via prefect # via pydantic - # via python-utils + # via pydantic-core # via sqlalchemy + # via typer + # via uvicorn +tzdata==2024.1 + # via o365 + # via pandas tzlocal==5.2 + # via dateparser + # via o365 # via trino -urllib3==1.26.18 - # via botocore - # via minio - # via opensearch-py +ujson==5.10.0 + # via prefect +urllib3==2.2.2 + # via docker + # via kubernetes # via requests -visions==0.7.5 +uvicorn==0.30.6 + # via prefect +visions==0.7.6 # via viadot2 -wcwidth==0.2.13 - # via prompt-toolkit -webencodings==0.5.1 - # via bleach - # via tinycss2 -websocket-client==1.7.0 - # via jupyter-server -wrapt==1.16.0 - # via aiobotocore -yarl==1.9.4 - # via aiohttp -zeep==4.2.1 - # via simple-salesforce +websocket-client==1.8.0 + # via kubernetes +websockets==12.0 + # via prefect diff --git a/src/viadot/__init__.py b/src/viadot/__init__.py index 439c85a47..6c5389e07 100644 --- a/src/viadot/__init__.py +++ b/src/viadot/__init__.py @@ -1,5 +1,8 @@ +"""The viadot library.""" + import logging + # Remove trash Azure INFO logs which contain low-level debugging information # but keep WARNING and higher ones in case something actually important happens. diff --git a/src/viadot/config.py b/src/viadot/config.py index 988266f0c..7b1bf5a6a 100644 --- a/src/viadot/config.py +++ b/src/viadot/config.py @@ -1,55 +1,74 @@ +"""Viadot config.""" + from __future__ import annotations import json import logging -from os.path import expanduser, join -from typing import Optional +from pathlib import Path +from typing import Any from yaml import safe_load + logger = logging.getLogger(__name__) -USER_HOME = expanduser("~") +USER_HOME = Path.home() class Config(dict): @classmethod - def _get_configuration( - cls, config: dict, key: Optional[str] = None - ) -> Optional[dict]: + def _get_configuration(cls, config: dict, key: str | None = None) -> dict | None: # Empty config. if not config: - return + return None # Config key does not exist or has no values. if key: config = config.get(key) if not config: logger.warning(f"No configuration found under the '{key}' config key.") - return + return None return cls(**config) @classmethod - def from_json(cls, path: str, key: Optional[str] = None) -> Config: - with open(path) as f: + def from_json(cls, path: str | Path, key: str | None = None) -> Config: + """Create a Config object from a JSON file. + + Args: + path (str): The path to the JSON file. + key (str | None, optional): The key inside the JSON. Defaults to None. + + Returns: + Config: The Config object. + """ + with Path(path).open() as f: config = json.load(f) return cls._get_configuration(config, key=key) @classmethod - def from_yaml(cls, path: str, key: Optional[str] = None) -> Config: - with open(path) as f: + def from_yaml(cls, path: str | Path, key: str | None = None) -> Config: + """Create a Config object from a YAML file. + + Args: + path (str): The path to the YAML file. + key (str | None, optional): The key inside the YAML. Defaults to None. + + Returns: + Config: The Config object. + """ + with Path(path).open() as f: config = safe_load(stream=f) return cls._get_configuration(config, key=key) -config_dir = join(USER_HOME, ".config", "viadot") +config_dir = Path(USER_HOME) / ".config" / "viadot" try: - CONFIG = Config.from_yaml(join(config_dir, "config.yaml")) + CONFIG = Config.from_yaml(config_dir / "config.yaml") except FileNotFoundError: try: - CONFIG = Config.from_json(join(config_dir, "config.json")) + CONFIG = Config.from_json(config_dir / "config.json") except FileNotFoundError: CONFIG = Config() except ValueError: @@ -58,15 +77,37 @@ def from_yaml(cls, path: str, key: Optional[str] = None) -> Config: CONFIG = Config() -def get_source_config(key, config=CONFIG): +def get_source_config(key: str, config: Config = CONFIG) -> dict[str, Any] | None: + """Retrieve source configuration. + + Args: + key (str): The key inside the config to look for. + config (Config, optional): The config object to extract from. Defaults to + CONFIG. + + Returns: + dict[str, Any]: Source configuration. + """ source_configs = config.get("sources") if source_configs is not None: for source_config in source_configs: - if key in source_config.keys(): + if key in source_config: return source_configs[source_configs.index(source_config)][key] + return None + + +def get_source_credentials(key: str, config: Config = CONFIG) -> dict[str, Any] | None: + """Retrieve source credentials from the provided config. + Args: + key (str): The key inside the config to look for. + config (Config, optional): The config object to extract from. Defaults to + CONFIG. -def get_source_credentials(key, config=CONFIG): - config = get_source_config(key, config) - if config is not None: - return config.get("credentials") + Returns: + dict[str, Any]: Source credentials. + """ + source_config = get_source_config(key, config) + if source_config is not None: + return source_config.get("credentials") + return None diff --git a/src/viadot/examples/__init__.py b/src/viadot/examples/__init__.py index e69de29bb..db7eb126c 100644 --- a/src/viadot/examples/__init__.py +++ b/src/viadot/examples/__init__.py @@ -0,0 +1 @@ +"""Examples of viadot usage.""" diff --git a/src/viadot/examples/sap_rfc/README.md b/src/viadot/examples/sap_rfc/README.md index 85b7aa04b..a1d4732a4 100644 --- a/src/viadot/examples/sap_rfc/README.md +++ b/src/viadot/examples/sap_rfc/README.md @@ -8,7 +8,7 @@ Note that we refer to a `sap_netweaver_rfc` folder in the Dockerfile. This is th Clone the viadot, enter the sap_rfc folder, and build the image: -``` +```console git clone --branch 2.0 https://github.com/dyvenia/viadot.git && \ cd viadot/viadot/examples/sap_rfc && \ docker build -t viadot:sap_rfc . --no-cache @@ -16,8 +16,8 @@ docker build -t viadot:sap_rfc . --no-cache Spin it up with the provided `docker-compose` -``` -docker-compose up -d +```console +docker compose -f docker/docker-compose.yml up -d ``` You can now open up Jupyter Lab at `localhost:5678`. @@ -26,11 +26,10 @@ You can now open up Jupyter Lab at `localhost:5678`. Credentials and other settings are stored as `~/.config/viadot/config.yaml`. A config file needs to be written in yaml format. A typical config file looks like so: -``` +```yaml version: 1 sources: - - sharepoint_prod: class: Sharepoint credentials: @@ -43,6 +42,6 @@ sources: To run tests, run pytest: -``` +```console docker exec -it viadot_saprfc_lab pytest tests/integration/test_sap_rfc.py ``` diff --git a/src/viadot/exceptions.py b/src/viadot/exceptions.py index 6000e5bca..f3d7b818b 100644 --- a/src/viadot/exceptions.py +++ b/src/viadot/exceptions.py @@ -1,3 +1,6 @@ +"""Viadot exceptions.""" + + class ValidationError(Exception): pass @@ -14,57 +17,53 @@ class DBDataAccessError(Exception): pass -class TableDoesNotExist(Exception): +class TableDoesNotExistError(Exception): def __init__( self, - message: str = None, - table: str = None, - schema: str = None, - fqn: str = None, + message: str | None = None, + table: str | None = None, + schema: str | None = None, + fqn: str | None = None, ): - """ - Raise when a database table does not exist. + """Raise when a database table does not exist. Args: table (str, optional): The name of the table. Defaults to None. - schema (str, optional): The schema where the table is located. Defaults to None. - fqn (str, optional): The fully-qualified name of the table. Defaults to None. + schema (str, optional): The schema where the table is located. Defaults to + None. + fqn (str, optional): The fully-qualified name of the table. Defaults to + None. """ - if table and schema: - fqn = f"{schema}.{table}" - else: - fqn = fqn or table + fqn = f"{schema}.{table}" if table and schema else fqn or table message = message or f"Table {fqn} does not exist." super().__init__(message) -class TableAlreadyExists(Exception): +class TableAlreadyExistsError(Exception): def __init__( self, - message: str = None, - table: str = None, - schema: str = None, - fqn: str = None, + message: str | None = None, + table: str | None = None, + schema: str | None = None, + fqn: str | None = None, ): - """ - Raise when a database table already exists. + """Raise when a database table already exists. Args: table (str, optional): The name of the table. Defaults to None. - schema (str, optional): The schema where the table is located. Defaults to None. - fqn (str, optional): The fully-qualified name of the table. Defaults to None. + schema (str, optional): The schema where the table is located. Defaults to + None. + fqn (str, optional): The fully-qualified name of the table. Defaults to + None. """ - if table and schema: - fqn = f"{schema}.{table}" - else: - fqn = fqn or table + fqn = f"{schema}.{table}" if table and schema else fqn or table message = message or f"Table {fqn} already exists." super().__init__(message) -class DataBufferExceeded(Exception): +class DataBufferExceededError(Exception): pass diff --git a/src/viadot/orchestration/__init__.py b/src/viadot/orchestration/__init__.py new file mode 100644 index 000000000..9b36ba45c --- /dev/null +++ b/src/viadot/orchestration/__init__.py @@ -0,0 +1 @@ +"""Data orchestration utilities.""" diff --git a/src/viadot/orchestration/prefect/__init__.py b/src/viadot/orchestration/prefect/__init__.py new file mode 100644 index 000000000..8785e40c5 --- /dev/null +++ b/src/viadot/orchestration/prefect/__init__.py @@ -0,0 +1 @@ +"""Prefect tasks and flows for data orchestration.""" diff --git a/src/viadot/orchestration/prefect/exceptions.py b/src/viadot/orchestration/prefect/exceptions.py new file mode 100644 index 000000000..4cff4ee5b --- /dev/null +++ b/src/viadot/orchestration/prefect/exceptions.py @@ -0,0 +1,26 @@ +"""Exceptions to raise in case of issues while configuring or running viadot.""" + + +class MissingSourceCredentialsError(Exception): + """Raise when no source credentials were provided. + + Args: + message (str, optional): A custom message to pass. + """ + + def __init__(self, message: str | None = None) -> None: + """Override the default message.""" + default_message = "Either `credentials_secret`, `config_key`, or `credentials` has to be specified and not empty." + message = message or default_message + + super().__init__(message) + + +class MissingPrefectBlockError(Exception): + """Raise when a Prefect block is not found. + + Args: + message (str, optional): A custom message to pass. + """ + + pass diff --git a/src/viadot/orchestration/prefect/flows/__init__.py b/src/viadot/orchestration/prefect/flows/__init__.py new file mode 100644 index 000000000..9193fb2ed --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/__init__.py @@ -0,0 +1,46 @@ +"""Import flows.""" + +from .cloud_for_customers_to_adls import cloud_for_customers_to_adls +from .cloud_for_customers_to_databricks import cloud_for_customers_to_databricks +from .duckdb_to_parquet import duckdb_to_parquet +from .duckdb_to_sql_server import duckdb_to_sql_server +from .duckdb_transform import duckdb_transform +from .exchange_rates_to_adls import exchange_rates_to_adls +from .exchange_rates_to_databricks import exchange_rates_to_databricks +from .genesys_to_adls import genesys_to_adls +from .hubspot_to_adls import hubspot_to_adls +from .mindful_to_adls import mindful_to_adls +from .outlook_to_adls import outlook_to_adls +from .sap_to_parquet import sap_to_parquet +from .sap_to_redshift_spectrum import sap_to_redshift_spectrum +from .sharepoint_to_adls import sharepoint_to_adls +from .sharepoint_to_databricks import sharepoint_to_databricks +from .sharepoint_to_redshift_spectrum import sharepoint_to_redshift_spectrum +from .sharepoint_to_s3 import sharepoint_to_s3 +from .sql_server_to_minio import sql_server_to_minio +from .transform import transform +from .transform_and_catalog import transform_and_catalog + + +__all__ = [ + "cloud_for_customers_to_adls", + "cloud_for_customers_to_databricks", + "duckdb_to_parquet", + "duckdb_to_sql_server", + "duckdb_transform", + "exchange_rates_to_adls", + "exchange_rates_to_databricks", + "genesys_to_adls", + "hubspot_to_adls", + "mindful_to_adls", + "outlook_to_adls", + "sap_to_parquet", + "sap_to_redshift_spectrum", + "sharepoint_to_adls", + "sharepoint_to_databricks", + "sharepoint_to_redshift_spectrum", + "sharepoint_to_s3", + "sql_server_to_minio", + "transform", + "transform_and_catalog", +] diff --git a/src/viadot/orchestration/prefect/flows/cloud_for_customers_to_adls.py b/src/viadot/orchestration/prefect/flows/cloud_for_customers_to_adls.py new file mode 100644 index 000000000..43e927ce6 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/cloud_for_customers_to_adls.py @@ -0,0 +1,73 @@ +"""Flow for pulling data from CloudForCustomers to Adls.""" + +from typing import Any + +from prefect import flow + +from viadot.orchestration.prefect.tasks import ( + cloud_for_customers_to_df, + df_to_adls, +) + + +@flow +def cloud_for_customers_to_adls( # noqa: PLR0913 + # C4C + cloud_for_customers_url: str | None = None, + fields: list[str] | None = None, + dtype: dict[str, Any] | None = None, + endpoint: str | None = None, + report_url: str | None = None, + filter_params: dict[str, Any] | None = None, + # ADLS + adls_path: str | None = None, + overwrite: bool = False, + # Auth + cloud_for_customers_credentials_secret: str | None = None, + cloud_for_customers_config_key: str | None = None, + adls_credentials_secret: str | None = None, + adls_config_key: str | None = None, + **kwargs: dict[str, Any] | None, +) -> None: + """Download records from SAP Cloud for Customers and upload them to Azure Data Lake. + + Args: + cloud_for_customers_url (str): The URL to the C4C API. For example, + 'https://myNNNNNN.crm.ondemand.com/c4c/v1/'. + fields (list[str], optional): List of fields to put in DataFrame. + dtype (dict, optional): The dtypes to use in the DataFrame. + endpoint (str, optional): The API endpoint. + report_url (str, optional): The API url in case of prepared report. + filter_params (dict[str, Any], optional): Query parameters. + adls_path (str): The destination path. + overwrite (bool, optional): Whether to overwrite files in the lake. Defaults to + False. + cloud_for_customers_credentials_secret (str, optional): The name of the Azure + Key Vault secret storing the C4C credentials. Defaults to None. + cloud_for_customers_config_key (str, optional): The key in the viadot config + holding relevant credentials. Defaults to None. + adls_credentials_secret (str, optional): The name of the Azure Key Vault secret + storing the ADLS credentials. Defaults to None. + adls_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + kwargs: The parameters to pass to the DataFrame constructor. + """ + df = cloud_for_customers_to_df( + url=cloud_for_customers_url, + fields=fields, + dtype=dtype, + endpoint=endpoint, + report_url=report_url, + credentials_secret=cloud_for_customers_credentials_secret, + config_key=cloud_for_customers_config_key, + filter_params=filter_params, + **kwargs, + ) + + return df_to_adls( + df=df, + path=adls_path, + credentials_secret=adls_credentials_secret, + config_key=adls_config_key, + overwrite=overwrite, + ) diff --git a/src/viadot/orchestration/prefect/flows/cloud_for_customers_to_databricks.py b/src/viadot/orchestration/prefect/flows/cloud_for_customers_to_databricks.py new file mode 100644 index 000000000..c8d066364 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/cloud_for_customers_to_databricks.py @@ -0,0 +1,76 @@ +"""Flow for pulling data from CloudForCustomers to Databricks.""" + +from typing import Any, Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import ( + cloud_for_customers_to_df, + df_to_databricks, +) + + +@flow +def cloud_for_customers_to_databricks( # noqa: PLR0913 + # C4C + cloud_for_customers_url: str, + fields: list[str] | None = None, + dtype: dict[str, Any] | None = None, + endpoint: str | None = None, + report_url: str | None = None, + filter_params: dict[str, Any] | None = None, + # Databricks + databricks_table: str | None = None, + databricks_schema: str | None = None, + if_exists: Literal["replace", "skip", "fail"] = "fail", + # Auth + cloud_for_customers_credentials_secret: str | None = None, + cloud_for_customers_config_key: str | None = None, + databricks_credentials_secret: str | None = None, + databricks_config_key: str | None = None, + **kwargs: dict[str, Any] | None, +) -> None: + """Download a file from SAP Cloud for Customers and upload it to Azure Data Lake. + + Args: + cloud_for_customers_url (str): The URL to the C4C API. For example, + 'https://myNNNNNN.crm.ondemand.com/c4c/v1/'. + fields (list[str], optional): List of fields to put in DataFrame. + dtype (dict, optional): The dtypes to use in the DataFrame. + endpoint (str, optional): The API endpoint. + report_url (str, optional): The API url in case of prepared report. + filter_params (dict[str, Any], optional): Query parameters. + databricks_table (str): The name of the target table. + databricks_schema (str, optional): The name of the target schema. + if_exists (str, Optional): What to do if the table already exists. One of + 'replace', 'skip', and 'fail'. + cloud_for_customers_credentials_secret (str, optional): The name of the Azure + Key Vault secret storing the C4C credentials. Defaults to None. + cloud_for_customers_config_key (str, optional): The key in the viadot config + holding relevant credentials. Defaults to None. + databricks_credentials_secret (str, optional): The name of the Azure Key Vault + secret storing relevant credentials. Defaults to None. + databricks_config_key (str, optional): The key in the viadot config holding + relevant credentials. Defaults to None. + kwargs: The parameters to pass to the DataFrame constructor. + """ + df = cloud_for_customers_to_df( + url=cloud_for_customers_url, + fields=fields, + dtype=dtype, + endpoint=endpoint, + report_url=report_url, + credentials_secret=cloud_for_customers_credentials_secret, + config_key=cloud_for_customers_config_key, + filter_params=filter_params, + **kwargs, + ) + + return df_to_databricks( + df=df, + schema=databricks_schema, + table=databricks_table, + if_exists=if_exists, + credentials_secret=databricks_credentials_secret, + config_key=databricks_config_key, + ) diff --git a/src/viadot/orchestration/prefect/flows/duckdb_to_parquet.py b/src/viadot/orchestration/prefect/flows/duckdb_to_parquet.py new file mode 100644 index 000000000..41089f784 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/duckdb_to_parquet.py @@ -0,0 +1,57 @@ +"""Flow for extracting data from the DuckDB to a Parquet file.""" + +from typing import Any, Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import duckdb_query +from viadot.orchestration.prefect.tasks.task_utils import df_to_parquet + + +@flow( + name="extract--duckdb--parquet", + description="Extract data from DuckDB and save it to Parquet file.", + retries=1, + retry_delay_seconds=60, + timeout_seconds=2 * 60 * 60, +) +def duckdb_to_parquet( + query: str, + path: str, + if_exists: Literal["append", "replace", "skip"] = "replace", + duckdb_credentials_secret: str | None = None, + # Specifying credentials in a dictionary is not recommended in the viadot flows, + # but in this case credentials can include only database name. + duckdb_credentials: dict[str, Any] | None = None, + duckdb_config_key: str | None = None, +) -> None: + """Download a table from DuckDB and save it to a Parquet file. + + Args: + query (str, required): The query to execute on the DuckDB database. If the query + doesn't start with "SELECT", returns an empty DataFrame. + path (str): Path where to save a Parquet file which will be created while + executing flow. + if_exists (Literal, optional): What to do if the file exists. Defaults to + "replace". + duckdb_credentials_secret (str, optional): The name of the secret storing + the credentials to the DuckDB. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + duckdb_credentials (dict[str, Any], optional): Credentials to the DuckDB. + Defaults to None. + duckdb_config_key (str, optional): The key in the viadot config holding relevant + credentials to the DuckDB. Defaults to None. + + """ + df = duckdb_query( + query=query, + fetch_type="dataframe", + credentials=duckdb_credentials, + config_key=duckdb_config_key, + credentials_secret=duckdb_credentials_secret, + ) + return df_to_parquet( + df=df, + path=path, + if_exists=if_exists, + ) diff --git a/src/viadot/orchestration/prefect/flows/duckdb_to_sql_server.py b/src/viadot/orchestration/prefect/flows/duckdb_to_sql_server.py new file mode 100644 index 000000000..188441d48 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/duckdb_to_sql_server.py @@ -0,0 +1,121 @@ +"""Flow for extracting data from the DuckDB into SQLServer.""" + +from pathlib import Path +from typing import Any, Literal + +from prefect import flow, task +from prefect.logging import get_run_logger + +from viadot.orchestration.prefect.tasks import ( + bcp, + create_sql_server_table, + duckdb_query, +) +from viadot.orchestration.prefect.tasks.task_utils import ( + df_to_csv, + get_sql_dtypes_from_df, +) + + +@task(timeout_seconds=60 * 60) +def cleanup_csv_task(path: str) -> None: + """Remove a CSV file from the local filesystem.""" + logger = get_run_logger() + + logger.info(f"Removing file {path}...") + try: + Path(path).unlink() + logger.info(f"File {path} has been successfully removed.") + except Exception: + logger.exception(f"File {path} could not be removed.") + + +@flow( + name="extract--duckdb--sql_server", + description="Extract data from DuckDB and save it in the SQLServer", + retries=1, + retry_delay_seconds=60, + timeout_seconds=2 * 60 * 60, +) +def duckdb_to_sql_server( # noqa: PLR0913 + query: str, + local_path: str, + db_table: str, + db_schema: str, + if_exists: Literal["fail", "replace", "skip", "delete"] = "replace", + dtypes: dict[str, Any] | None = None, + chunksize: int = 5000, + error_log_file_path: str = "./log_file.log", + on_error: Literal["skip", "fail"] = "skip", + duckdb_credentials_secret: str | None = None, + # Specifying credentials in a dictionary is not recommended in the viadot flows, + # but in this case credentials can include only database name. + duckdb_credentials: dict[str, Any] | None = None, + duckdb_config_key: str | None = None, + sql_server_credentials_secret: str | None = None, + sql_server_config_key: str | None = None, +) -> None: + """Download a table from DuckDB and upload it to the SQLServer. + + Args: + query (str, required): The query to execute on the SQL Server database. + If the qery doesn't start with "SELECT" returns an empty DataFrame. + local_path (str): Where to store the CSV data dump used for bulk upload to SQL + Server. + db_table (str, optional): Destination table. Defaults to None. + db_schema (str, optional): Destination schema. Defaults to None. + if_exists (Literal, optional): What to do if the table exists. Defaults to + "replace". + dtypes (dict, optional): The data types to be enforced for the resulting table. + By default, inferred from the DataFrame. Defaults to None. + chunksize (int, optional): Size of a chunk to use in the bcp function. + Defaults to 5000. + error_log_file_path (string, optional): Full path of an error file. Defaults + to "./log_file.log". + on_error (str, optional): What to do in case of a bcp error. Defaults to "skip". + duckdb_credentials_secret (str, optional): The name of the secret storing + the credentials to the DuckDB. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + duckdb_credentials (dict[str, Any], optional): Credentials to the DuckDB. + Defaults to None. + duckdb_config_key (str, optional): The key in the viadot config holding relevant + credentials to the DuckDB. Defaults to None. + sql_server_credentials_secret (str, optional): The name of the secret storing + the credentials to the SQLServer. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + sql_server_config_key (str, optional): The key in the viadot config holding + relevant credentials to the SQLServer. Defaults to None. + + """ + df = duckdb_query( + query=query, + fetch_type="dataframe", + credentials=duckdb_credentials, + config_key=duckdb_config_key, + credentials_secret=duckdb_credentials_secret, + ) + if dtypes is None: + dtypes = get_sql_dtypes_from_df(df) + + create_sql_server_table( + table=db_table, + schema=db_schema, + dtypes=dtypes, + if_exists=if_exists, + credentials_secret=sql_server_credentials_secret, + config_key=sql_server_config_key, + ) + df_to_csv(df=df, path=local_path) + + bcp( + path=local_path, + schema=db_schema, + table=db_table, + chunksize=chunksize, + error_log_file_path=error_log_file_path, + on_error=on_error, + credentials_secret=sql_server_credentials_secret, + config_key=sql_server_config_key, + ) + + cleanup_csv_task(path=local_path) diff --git a/src/viadot/orchestration/prefect/flows/duckdb_transform.py b/src/viadot/orchestration/prefect/flows/duckdb_transform.py new file mode 100644 index 000000000..3d31349cf --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/duckdb_transform.py @@ -0,0 +1,42 @@ +"""Flow for transforming data in DuckDB.""" + +from typing import Any + +from prefect import flow + +from viadot.orchestration.prefect.tasks import duckdb_query + + +@flow( + name="transform--duckdb", + description="Transform data in the DuckDB.", + retries=1, + retry_delay_seconds=60, + timeout_seconds=2 * 60 * 60, +) +def duckdb_transform( + query: str, + duckdb_credentials_secret: str | None = None, + # Specifying credentials in a dictionary is not recommended in the viadot flows, + # but in this case credentials can include only database name. + duckdb_credentials: dict[str, Any] | None = None, + duckdb_config_key: str | None = None, +) -> None: + """Transform data inside DuckDB database. + + Args: + query (str, required): The query to execute on the DuckDB database. + duckdb_credentials_secret (str, optional): The name of the secret storing + the credentials. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + duckdb_credentials (dict[str, Any], optional): Credentials to the database. + Defaults to None. + duckdb_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + """ + duckdb_query( + query=query, + credentials=duckdb_credentials, + config_key=duckdb_config_key, + credentials_secret=duckdb_credentials_secret, + ) diff --git a/src/viadot/orchestration/prefect/flows/exchange_rates_to_adls.py b/src/viadot/orchestration/prefect/flows/exchange_rates_to_adls.py new file mode 100644 index 000000000..b8c234d17 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/exchange_rates_to_adls.py @@ -0,0 +1,77 @@ +"""Flows for pulling data from Exchange rates API to Azure Data Lake.""" + +from datetime import datetime +from typing import Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import df_to_adls, exchange_rates_to_df + + +Currency = Literal[ + "USD", "EUR", "GBP", "CHF", "PLN", "DKK", "COP", "CZK", "SEK", "NOK", "ISK" +] + + +@flow( + name="extract--exchange-rates-api--adls", + description="Extract data from Exchange Rates API and load it into Azure Data Lake.", + retries=1, + retry_delay_seconds=60, +) +def exchange_rates_to_adls( + adls_path: str, + overwrite: bool = False, + currency: Currency = "USD", + start_date: str = datetime.today().strftime("%Y-%m-%d"), + end_date: str = datetime.today().strftime("%Y-%m-%d"), + symbols: list[str] | None = None, + exchange_rates_credentials_secret: str | None = None, + exchange_rates_config_key: str | None = None, + adls_credentials_secret: str | None = None, + adls_config_key: str | None = None, +) -> None: + """Download a DataFrame from ExchangeRates API and upload it to Azure Data Lake. + + Args: + adls_path (str): The destination path. + overwrite (bool, optional): Whether to overwrite files in the lake. + Defaults to False. + currency (Currency, optional): Base currency to which prices of searched + currencies are related. Defaults to "USD". + start_date (str, optional): Initial date for data search. + Data range is start_date -> end_date, + supported format 'yyyy-mm-dd'. + Defaults to datetime.today().strftime("%Y-%m-%d"). + end_date (str, optional): See above. + Defaults to datetime.today().strftime("%Y-%m-%d"). + symbols (List[str], optional): List of currencies for which + exchange rates from base currency will be fetched. + Defaults to + ["USD","EUR","GBP","CHF","PLN","DKK","COP","CZK","SEK","NOK","ISK"]. + exchange_rates_credentials_secret (str, optional): The name of the + Azure Key Vault secret storing the exchange rates credentials. + Defaults to None. + exchange_rates_config_key (str, optional): The key in the viadot config holding + relevant credentials. Defaults to None. + adls_credentials_secret (str, optional): The name of the Azure Key Vault secret + storing the ADLS credentials. Defaults to None. + adls_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + """ + df = exchange_rates_to_df( + currency=currency, + credentials_secret=exchange_rates_credentials_secret, + config_key=exchange_rates_config_key, + start_date=start_date, + end_date=end_date, + symbols=symbols, + ) + + return df_to_adls( + df=df, + path=adls_path, + credentials_secret=adls_credentials_secret, + config_key=adls_config_key, + overwrite=overwrite, + ) diff --git a/src/viadot/orchestration/prefect/flows/exchange_rates_to_databricks.py b/src/viadot/orchestration/prefect/flows/exchange_rates_to_databricks.py new file mode 100644 index 000000000..2c4b9e7f0 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/exchange_rates_to_databricks.py @@ -0,0 +1,83 @@ +"""Flows for pulling data from Exchange rates API to Databricks.""" + +from datetime import datetime +from typing import Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import df_to_databricks, exchange_rates_to_df + + +Currency = Literal[ + "USD", "EUR", "GBP", "CHF", "PLN", "DKK", "COP", "CZK", "SEK", "NOK", "ISK" +] + + +@flow( + name="extract--exchange-rates-api--databricks", + description="Extract data from Exchange Rates API and load it into Databricks.", + retries=1, + retry_delay_seconds=60, +) +def exchange_rates_to_databricks( # noqa: PLR0913 + databricks_table: str, + databricks_schema: str | None = None, + if_exists: Literal["replace", "skip", "fail"] = "fail", + currency: Currency = "USD", + start_date: str = datetime.today().strftime("%Y-%m-%d"), + end_date: str = datetime.today().strftime("%Y-%m-%d"), + symbols: list[str] | None = None, + exchange_rates_credentials_secret: str | None = None, + exchange_rates_config_key: str | None = None, + databricks_credentials_secret: str | None = None, + databricks_config_key: str | None = None, +) -> None: + """Download a DataFrame from ExchangeRates API and upload it to Databricks. + + Args: + databricks_table (str): The name of the target table. + databricks_schema (str, optional): The name of the target schema. + Defaults to None. + if_exists (Literal["replace", "skip", "fail"], optional): + What to do if the table already exists. + One of "replace", "skip", and "fail". Defaults to "fail". + currency (Currency, optional): Base currency to which prices of searched + currencies are related. Defaults to "USD". + start_date (str, optional): Initial date for data search. + Data range is start_date -> end_date, + supported format 'yyyy-mm-dd'. + Defaults to datetime.today().strftime("%Y-%m-%d"). + end_date (str, optional): See above. + Defaults to datetime.today().strftime("%Y-%m-%d"). + symbols (List[str], optional): List of currencies for which + exchange rates from base currency will be fetched. + Defaults to + ["USD","EUR","GBP","CHF","PLN","DKK","COP","CZK","SEK","NOK","ISK"]. + Only ISO codes. + exchange_rates_credentials_secret (str, optional): The name of the + Azure Key Vault secret storing the exchange rates credentials. + Defaults to None. + exchange_rates_config_key (str, optional): The key in the viadot config holding + relevant credentials. Defaults to None. + databricks_credentials_secret (str, optional): The name of the Azure Key Vault + secret storing relevant credentials. Defaults to None. + databricks_config_key (str, optional): The key in the viadot config holding + relevant credentials. Defaults to None. + """ + df = exchange_rates_to_df( + currency=currency, + credentials_secret=exchange_rates_credentials_secret, + config_key=exchange_rates_config_key, + start_date=start_date, + end_date=end_date, + symbols=symbols, + ) + + return df_to_databricks( + df=df, + schema=databricks_schema, + table=databricks_table, + if_exists=if_exists, + credentials_secret=databricks_credentials_secret, + config_key=databricks_config_key, + ) diff --git a/src/viadot/orchestration/prefect/flows/genesys_to_adls.py b/src/viadot/orchestration/prefect/flows/genesys_to_adls.py new file mode 100644 index 000000000..9575f1634 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/genesys_to_adls.py @@ -0,0 +1,113 @@ +"""Download data from Genesys Cloud and upload it to Azure Data Lake Storage.""" + +import time +from typing import Any + +from prefect import flow +from prefect.task_runners import ConcurrentTaskRunner + +from viadot.orchestration.prefect.tasks import df_to_adls, genesys_to_df + + +@flow( + name="Genesys extraction to ADLS", + description="Extract data from Genesys Cloud" + + " and load it into Azure Data Lake Storage.", + retries=1, + retry_delay_seconds=60, + task_runner=ConcurrentTaskRunner, + log_prints=True, +) +def genesys_to_adls( # noqa: PLR0913 + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + verbose: bool | None = None, + endpoint: str | None = None, + environment: str = "mypurecloud.de", + queues_ids: list[str] | None = None, + view_type: str | None = None, + view_type_time_sleep: int | None = None, + post_data_list: list[dict[str, Any]] | None = None, + normalization_sep: str = ".", + drop_duplicates: bool = False, + validate_df_dict: dict[str, Any] | None = None, + adls_config_key: str | None = None, + adls_azure_key_vault_secret: str | None = None, + adls_path: str | None = None, + adls_path_overwrite: bool = False, +) -> None: + """Flow for downloading data from mindful to Azure Data Lake. + + Args: + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + verbose (bool, optional): Increase the details of the logs printed on the + screen. Defaults to False. + endpoint (Optional[str], optional): Final end point to the API. + Defaults to None. + environment (str, optional): the domain that appears for Genesys Cloud + Environment based on the location of your Genesys Cloud organization. + Defaults to "mypurecloud.de". + queues_ids (Optional[List[str]], optional): List of queues ids to consult the + members. Defaults to None. + view_type (Optional[str], optional): The type of view export job to be created. + Defaults to None. + view_type_time_sleep (Optional[int], optional): Waiting time to retrieve data + from Genesys Cloud API. Defaults to None. + post_data_list (Optional[List[Dict[str, Any]]], optional): List of string + templates to generate json body in POST calls to the API. Defaults to None. + normalization_sep (str, optional): Nested records will generate names separated + by sep. Defaults to ".". + drop_duplicates (bool, optional): Remove duplicates from the DataFrame. + Defaults to False. + validate_df_dict (Optional[Dict[str, Any]], optional): A dictionary with + optional list of tests to verify the output dataframe. Defaults to None. + adls_config_key (Optional[str], optional): The key in the viadot config holding + relevant credentials. Defaults to None. + adls_azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret containing a dictionary with ACCOUNT_NAME and Service Principal + credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake. + Defaults to None. + adls_path (Optional[str], optional): Azure Data Lake destination file path (with + file name). Defaults to None. + adls_path_overwrite (bool, optional): Whether to overwrite the file in ADLS. + Defaults to True. + + Examples: + genesys_to_adls( + config_key=config_key, + verbose=False, + endpoint=endpoint, + post_data_list=data_to_post, + adls_config_key=adls_config_key, + adls_path=adls_path, + adls_path_overwrite=True, + ) + """ + data_frame = genesys_to_df( + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + verbose=verbose, + endpoint=endpoint, + environment=environment, + queues_ids=queues_ids, + view_type=view_type, + view_type_time_sleep=view_type_time_sleep, + post_data_list=post_data_list, + normalization_sep=normalization_sep, + drop_duplicates=drop_duplicates, + validate_df_dict=validate_df_dict, + ) + + # ??? + time.sleep(0.5) + + return df_to_adls( + df=data_frame, + path=adls_path, + credentials_secret=adls_azure_key_vault_secret, + config_key=adls_config_key, + overwrite=adls_path_overwrite, + ) diff --git a/src/viadot/orchestration/prefect/flows/hubspot_to_adls.py b/src/viadot/orchestration/prefect/flows/hubspot_to_adls.py new file mode 100644 index 000000000..ff5149c1e --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/hubspot_to_adls.py @@ -0,0 +1,81 @@ +"""Download data from Hubspot API and load into Azure Data Lake Storage.""" + +from typing import Any + +from prefect import flow +from prefect.task_runners import ConcurrentTaskRunner + +from viadot.orchestration.prefect.tasks import df_to_adls, hubspot_to_df + + +@flow( + name="Hubspot extraction to ADLS", + description="Extract data from Hubspot API and load into Azure Data Lake Storage.", + retries=1, + retry_delay_seconds=60, + task_runner=ConcurrentTaskRunner, +) +def hubspot_to_adls( + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + endpoint: str | None = None, + filters: list[dict[str, Any]] | None = None, + properties: list[Any] | None = None, + nrows: int = 1000, + adls_config_key: str | None = None, + adls_azure_key_vault_secret: str | None = None, + adls_path: str | None = None, + adls_path_overwrite: bool = False, +) -> None: + """Flow for downloading data from mindful to Azure Data Lake. + + Args: + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + endpoint (Optional[str], optional): API endpoint for an individual request. + Defaults to None. + filters (Optional[List[Dict[str, Any]]], optional): Filters defined for the API + body in specific order. Defaults to None. + properties (Optional[List[Any]], optional): List of user-defined columns to be + pulled from the API. Defaults to None. + nrows (int, optional): Max number of rows to pull during execution. + Defaults to 1000. + adls_config_key (Optional[str], optional): The key in the viadot config holding + relevant credentials. Defaults to None. + adls_azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret containing a dictionary with ACCOUNT_NAME and Service Principal + credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake. + Defaults to None. + adls_path (Optional[str], optional): Azure Data Lake destination file path + (with file name). Defaults to None. + adls_path_overwrite (bool, optional): Whether to overwrite the file in ADLS. + Defaults to True. + + Examples: + hubspot_to_adls( + config_key=config_key, + endpoint=endpoint, + nrows=nrows, + adls_config_key=adls_config_key, + adls_path=adls_path, + adls_path_overwrite=True, + ) + """ + data_frame = hubspot_to_df( + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + endpoint=endpoint, + filters=filters, + properties=properties, + nrows=nrows, + ) + + return df_to_adls( + df=data_frame, + path=adls_path, + credentials_secret=adls_azure_key_vault_secret, + config_key=adls_config_key, + overwrite=adls_path_overwrite, + ) diff --git a/src/viadot/orchestration/prefect/flows/mindful_to_adls.py b/src/viadot/orchestration/prefect/flows/mindful_to_adls.py new file mode 100644 index 000000000..a607383bf --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/mindful_to_adls.py @@ -0,0 +1,99 @@ +"""Download data from Mindful API and load it into Azure Data Lake Storage.""" + +from datetime import date +import time +from typing import Literal + +from prefect import flow +from prefect.task_runners import ConcurrentTaskRunner + +from viadot.orchestration.prefect.tasks import df_to_adls, mindful_to_df + + +@flow( + name="Mindful extraction to ADLS", + description="Extract data from mindful and load it into Azure Data Lake Storage.", + retries=1, + retry_delay_seconds=60, + task_runner=ConcurrentTaskRunner, +) +def mindful_to_adls( + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + region: Literal["us1", "us2", "us3", "ca1", "eu1", "au1"] = "eu1", + endpoint: list[str] | str | None = None, + date_interval: list[date] | None = None, + limit: int = 1000, + adls_config_key: str | None = None, + adls_azure_key_vault_secret: str | None = None, + adls_path: str | None = None, + adls_path_overwrite: bool = False, +) -> None: + """Flow to download data from Mindful to Azure Data Lake. + + Args: + credentials (Optional[Dict[str, Any]], optional): Mindful credentials as a + dictionary. Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + region (Literal[us1, us2, us3, ca1, eu1, au1], optional): Survey Dynamix region + from where to interact with the mindful API. Defaults to "eu1" English + (United Kingdom). + endpoint (Optional[Union[List[str], str]], optional): Endpoint name or list of + them from where to download data. Defaults to None. + date_interval (Optional[List[date]], optional): Date time range detailing the + starting date and the ending date. If no range is passed, one day of data + since this moment will be retrieved. Defaults to None. + limit (int, optional): The number of matching interactions to return. + Defaults to 1000. + adls_credentials (Optional[Dict[str, Any]], optional): The credentials as a + dictionary. Defaults to None. + adls_config_key (Optional[str], optional): The key in the viadot config holding + relevant credentials. Defaults to None. + adls_azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret containing a dictionary with ACCOUNT_NAME and Service Principal + credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake. + Defaults to None. + adls_path (Optional[str], optional): Azure Data Lake destination file path. + Defaults to None. + adls_path_overwrite (bool, optional): Whether to overwrite the file in ADLS. + Defaults to True. + + Examples: + mindful_to_adls( + config_key=config_key, + endpoint=endpoint, + date_interval=date_interval, + adls_path=adls_path, + adls_config_key=adls_config_key, + adls_azure_key_vault_secret=adls_azure_key_vault_secret, + adls_path_overwrite=True, + ) + """ + if isinstance(endpoint, str): + endpoint = [endpoint] + + endpoints = endpoint + + for endpoint in endpoints: + data_frame = mindful_to_df( + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + region=region, + endpoint=endpoint, + date_interval=date_interval, + limit=limit, + ) + + # ??? + time.sleep(0.5) + + df_to_adls( + df=data_frame, + path=adls_path.rstrip("/") + "/" + f"{endpoint}.csv", + credentials_secret=adls_azure_key_vault_secret, + config_key=adls_config_key, + overwrite=adls_path_overwrite, + ) diff --git a/src/viadot/orchestration/prefect/flows/outlook_to_adls.py b/src/viadot/orchestration/prefect/flows/outlook_to_adls.py new file mode 100644 index 000000000..063db1c68 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/outlook_to_adls.py @@ -0,0 +1,97 @@ +"""Download data from Outlook API to Azure Data Lake Storage.""" + +from prefect import flow +from prefect.task_runners import ConcurrentTaskRunner + +from viadot.orchestration.prefect.tasks import df_to_adls, outlook_to_df + + +@flow( + name="Outlook extraction to ADLS", + description="Extract data from Outlook and load it into Azure Data Lake Storage.", + retries=1, + retry_delay_seconds=60, + task_runner=ConcurrentTaskRunner, +) +def outlook_to_adls( # noqa: PLR0913 + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + mailbox_name: str | None = None, + request_retries: int = 10, + start_date: str | None = None, + end_date: str | None = None, + limit: int = 10000, + address_limit: int = 8000, + outbox_list: list[str] | None = None, + adls_config_key: str | None = None, + adls_azure_key_vault_secret: str | None = None, + adls_path: str | None = None, + adls_path_overwrite: bool = False, +) -> None: + """Flow to download data from Outlook API to Azure Data Lake. + + Args: + credentials (Optional[Dict[str, Any]], optional): Outlook credentials as a + dictionary. Defaults to None. + config_key (Optional[str], optional): The key in the viadot config holding + relevant credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + mailbox_name (Optional[str], optional): Mailbox name. Defaults to None. + request_retries (int, optional): How many times to retry the connection to + Outlook. Defaults to 10. + start_date (Optional[str], optional): A filtering start date parameter e.g. + "2022-01-01". Defaults to None. + end_date (Optional[str], optional): A filtering end date parameter e.g. + "2022-01-02". Defaults to None. + limit (int, optional): Number of fetched top messages. Defaults to 10000. + address_limit (int, optional): The maximum number of accepted characters in the + sum of all email names. Defaults to 8000. + outbox_list (List[str], optional): List of outbox folders to differentiate + between Inboxes and Outboxes. Defaults to ["Sent Items"]. + adls_credentials (Optional[Dict[str, Any]], optional): The credentials as a + dictionary. Defaults to None. + adls_config_key (Optional[str], optional): The key in the viadot config holding + relevant credentials. Defaults to None. + adls_azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret containing a dictionary with ACCOUNT_NAME and Service Principal + credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake. + Defaults to None. + adls_path (Optional[str], optional): Azure Data Lake destination file path + (with file name). Defaults to None. + adls_path_overwrite (bool, optional): Whether to overwrite the file in ADLS. + Defaults to True. + + Examples: + outlook_to_adls( + config_key=config_key, + mailbox_name=mailbox_name, + start_date=start_date, + end_date=end_date, + adls_config_key=adls_config_key, + adls_path=adls_path, + adls_path_overwrite=True, + ) + """ + if outbox_list is None: + outbox_list = ["Sent Items"] + + data_frame = outlook_to_df( + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + mailbox_name=mailbox_name, + request_retries=request_retries, + start_date=start_date, + end_date=end_date, + limit=limit, + address_limit=address_limit, + outbox_list=outbox_list, + ) + + return df_to_adls( + df=data_frame, + path=adls_path, + credentials_secret=adls_azure_key_vault_secret, + config_key=adls_config_key, + overwrite=adls_path_overwrite, + ) diff --git a/src/viadot/orchestration/prefect/flows/sap_to_parquet.py b/src/viadot/orchestration/prefect/flows/sap_to_parquet.py new file mode 100644 index 000000000..5f24a3aba --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/sap_to_parquet.py @@ -0,0 +1,77 @@ +"""Flows for downloading data from SAP to Parquet file.""" + +from typing import Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import sap_rfc_to_df +from viadot.orchestration.prefect.tasks.task_utils import df_to_parquet + + +@flow( + name="extract--sap--parquet", + description="Extract data from SAP and load it into Parquet file", + retries=1, + retry_delay_seconds=60, +) +def sap_to_parquet( # noqa: PLR0913 + path: str, + if_exists: Literal["append", "replace", "skip"] = "replace", + query: str | None = None, + func: str | None = None, + sap_sep: str | None = None, + rfc_total_col_width_character_limit: int = 400, + rfc_unique_id: list[str] | None = None, + sap_credentials_secret: str | None = None, + sap_config_key: str = "SAP", + alternative_version: bool = False, + replacement: str = "-", +) -> None: + """Download a pandas `DataFrame` from SAP load it into Parquet file. + + Args: + path (str): Path to Parquet file, where the data will be located. + Defaults to None. + if_exists (Literal["append", "replace", "skip"], optional): What to do if the + file exists. Defaults to "replace". + query (str): The query to be executed with pyRFC. + sap_sep (str, optional): The separator to use when reading query results. + If not provided, multiple options are automatically tested. + Defaults to None. + func (str, optional): SAP RFC function to use. Defaults to None. + rfc_total_col_width_character_limit (int, optional): Number of characters by + which query will be split in chunks in case of too many columns for RFC + function. According to SAP documentation, the limit is 512 characters. + However, it was observed that SAP raising an exception even on a slightly + lower number of characters, so safety margin was added. Defaults to 400. + rfc_unique_id (list[str], optional): Reference columns to merge chunks Data + Frames. These columns must to be unique. Otherwise, the table will be + malformed. If no columns are provided, all data frame columns will be + concatenated. Defaults to None. + sap_credentials_secret (str, optional): The name of the Prefect Secret that + stores SAP credentials. Defaults to None. + sap_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to "SAP". + alternative_version (bool, optional): If true, enables the use of SAPRFC source + in version 2. Defaults to False. + replacement (str, optional): In case of sep is on a columns, set up a new + character to replace inside the string to avoid flow breakdowns. + Defaults to "-". + """ + df = sap_rfc_to_df( + query=query, + sep=sap_sep, + func=func, + replacement=replacement, + rfc_total_col_width_character_limit=rfc_total_col_width_character_limit, + rfc_unique_id=rfc_unique_id, + config_key=sap_config_key, + credentials_secret=sap_credentials_secret, + alternative_version=alternative_version, + ) + + return df_to_parquet( + df=df, + path=path, + if_exists=if_exists, + ) diff --git a/src/viadot/orchestration/prefect/flows/sap_to_redshift_spectrum.py b/src/viadot/orchestration/prefect/flows/sap_to_redshift_spectrum.py new file mode 100644 index 000000000..6c5443838 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/sap_to_redshift_spectrum.py @@ -0,0 +1,114 @@ +"""Flows for downloading data from SAP and uploading it to AWS Redshift Spectrum.""" + +from typing import Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import df_to_redshift_spectrum, sap_rfc_to_df + + +@flow( + name="extract--sap--redshift_spectrum", + description="Extract data from SAP and load it into AWS Redshift Spectrum.", + retries=1, + retry_delay_seconds=60, +) +def sap_to_redshift_spectrum( # noqa: PLR0913 + to_path: str, + schema_name: str, + table: str, + extension: str = ".parquet", + if_exists: Literal["overwrite", "append"] = "overwrite", + partition_cols: list[str] | None = None, + index: bool = False, + compression: str | None = None, + aws_sep: str = ",", + description: str = "test", + aws_config_key: str | None = None, + query: str | None = None, + sap_sep: str | None = None, + func: str | None = None, + rfc_total_col_width_character_limit: int = 400, + rfc_unique_id: list[str] | None = None, + sap_credentials_secret: str | None = None, + sap_config_key: str | None = None, + alternative_version: bool = False, + replacement: str = "-", +) -> None: + """Download a pandas `DataFrame` from SAP and upload it to AWS Redshift Spectrum. + + Args: + to_path (str): Path to a S3 folder where the table will be located. + Defaults to None. + schema_name (str): AWS Glue catalog database name. + table (str): AWS Glue catalog table name. + partition_cols (list[str]): List of column names that will be used to create + partitions. Only takes effect if dataset=True. + extension (str): Required file type. Accepted file formats are 'csv' and + 'parquet'. + if_exists (str, optional): 'overwrite' to recreate any possible existing table + or 'append' to keep any possible existing table. Defaults to overwrite. + partition_cols (list[str], optional): List of column names that will be used to + create partitions. Only takes effect if dataset=True. Defaults to None. + index (bool, optional): Write row names (index). Defaults to False. + compression (str, optional): Compression style (None, snappy, gzip, zstd). + aws_sep (str, optional): Field delimiter for the output file. Defaults to ','. + description (str, optional): AWS Glue catalog table description. + aws_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + query (str): The query to be executed with pyRFC. + sap_sep (str, optional): The separator to use when reading query results. + If not provided, multiple options are automatically tried. + Defaults to None. + func (str, optional): SAP RFC function to use. Defaults to None. + rfc_total_col_width_character_limit (int, optional): Number of characters by + which query will be split in chunks in case of too many columns for RFC + function. According to SAP documentation, the limit is 512 characters. + However, we observed SAP raising an exception even on a slightly lower + number of characters, so we add a safety margin. Defaults to 400. + rfc_unique_id (list[str], optional): Reference columns to merge chunks Data + Frames. These columns must to be unique. If no columns are provided, all + data frame columns will by concatenated. Defaults to None. + sap_credentials_secret (str, optional): The name of the AWS secret that stores + SAP credentials. Defaults to None. + sap_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + alternative_version (bool, optional): Enable the use version 2 in source. + Defaults to False. + replacement (str, optional): In case of sep is on a columns, set up a new + character to replace inside the string to avoid flow breakdowns. + Defaults to "-". + + Examples: + sap_to_redshift_spectrum( + ... + rfc_unique_id=["VBELN", "LPRIO"], + ... + ) + """ + df = sap_rfc_to_df( + query=query, + sep=sap_sep, + func=func, + rfc_unique_id=rfc_unique_id, + rfc_total_col_width_character_limit=rfc_total_col_width_character_limit, + credentials_secret=sap_credentials_secret, + config_key=sap_config_key, + alternative_version=alternative_version, + replacement=replacement, + ) + + return df_to_redshift_spectrum( + df=df, + to_path=to_path, + schema_name=schema_name, + table=table, + extension=extension, + if_exists=if_exists, + partition_cols=partition_cols, + index=index, + compression=compression, + sep=aws_sep, + description=description, + config_key=aws_config_key, + ) diff --git a/src/viadot/orchestration/prefect/flows/sharepoint_to_adls.py b/src/viadot/orchestration/prefect/flows/sharepoint_to_adls.py new file mode 100644 index 000000000..83e28fd79 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/sharepoint_to_adls.py @@ -0,0 +1,61 @@ +"""Flows for pulling data from/into Sharepoint.""" + +from prefect import flow + +from viadot.orchestration.prefect.tasks import df_to_adls, sharepoint_to_df + + +@flow( + name="extract--sharepoint--adls", + description="Extract data from Exchange Rates API and load it into Azure Data Lake.", + retries=1, + retry_delay_seconds=60, +) +def sharepoint_to_adls( + sharepoint_url: str, + adls_path: str, + sharepoint_credentials_secret: str | None = None, + sharepoint_config_key: str | None = None, + adls_credentials_secret: str | None = None, + adls_config_key: str | None = None, + sheet_name: str | list[str | int] | int | None = None, + columns: str | list[str] | list[int] | None = None, + overwrite: bool = False, +) -> None: + """Download a file from Sharepoint and upload it to Azure Data Lake. + + Args: + sharepoint_url (str): The URL to the file. + adls_path (str): The destination path. + sharepoint_credentials_secret (str, optional): The name of the Azure Key Vault + secret storing the Sharepoint credentials. Defaults to None. + sharepoint_config_key (str, optional): The key in the viadot config holding + relevant credentials. Defaults to None. + adls_credentials_secret (str, optional): The name of the Azure Key Vault secret + storing the ADLS credentials. Defaults to None. + adls_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + sheet_name (str | list | int, optional): Strings are used + for sheet names. Integers are used in zero-indexed sheet positions + (chart sheets do not count as a sheet position). Lists of strings/integers + are used to request multiple sheets. Specify None to get all worksheets. + Defaults to None. + columns (str | list[str] | list[int], optional): Which columns to ingest. + Defaults to None. + overwrite (bool, optional): Whether to overwrite files in the lake. Defaults + to False. + """ + df = sharepoint_to_df( + url=sharepoint_url, + credentials_secret=sharepoint_credentials_secret, + config_key=sharepoint_config_key, + sheet_name=sheet_name, + columns=columns, + ) + return df_to_adls( + df=df, + path=adls_path, + credentials_secret=adls_credentials_secret, + config_key=adls_config_key, + overwrite=overwrite, + ) diff --git a/src/viadot/orchestration/prefect/flows/sharepoint_to_databricks.py b/src/viadot/orchestration/prefect/flows/sharepoint_to_databricks.py new file mode 100644 index 000000000..dabb4ef78 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/sharepoint_to_databricks.py @@ -0,0 +1,74 @@ +"""Flows for pulling data from/into Sharepoint.""" + +import contextlib +from typing import Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import df_to_databricks, sharepoint_to_df + + +@flow( + name="extract--sharepoint--databricks", + description="Extract data from Sharepoint and load it into Databricks.", + retries=1, + retry_delay_seconds=60, +) +def sharepoint_to_databricks( + sharepoint_url: str, + databricks_table: str, + databricks_schema: str | None = None, + if_exists: Literal["replace", "skip", "fail"] = "fail", + sheet_name: str | list | int | None = None, + columns: str | list[str] | list[int] | None = None, + sharepoint_credentials_secret: str | None = None, + sharepoint_config_key: str | None = None, + databricks_credentials_secret: str | None = None, + databricks_config_key: str | None = None, +) -> None: + """Download a file from Sharepoint and upload it to Azure Data Lake. + + Args: + sharepoint_url (str): The URL to the file. + databricks_table (str): The name of the target table. + databricks_schema (str, optional): The name of the target schema. + if_exists (str, Optional): What to do if the table already exists. + One of 'replace', 'skip', and 'fail'. + columns (str | list[str] | list[int], optional): Which columns to ingest. + Defaults to None. + sheet_name (str | list | int, optional): Strings are used for sheet names. + Integers are used in zero-indexed sheet positions + (chart sheets do not count as a sheet position). Lists of strings/integers + are used to request multiple sheets. Specify None to get all worksheets. + Defaults to None. + sharepoint_credentials_secret (str, optional): The name of the Azure Key Vault + secret storing relevant credentials. Defaults to None. + sharepoint_config_key (str, optional): The key in the viadot config holding + relevant credentials. Defaults to None. + databricks_credentials_secret (str, optional): The name of the Azure Key Vault + secret storing relevant credentials. Defaults to None. + databricks_config_key (str, optional): The key in the viadot config holding + relevant credentials. Defaults to None. + """ + # Workaround Prefect converting this parameter to string due to multiple + # supported input types -- pandas relies on the data type to choose relevant + # implementation. + if sheet_name is not None: + with contextlib.suppress(ValueError): + sheet_name = int(sheet_name) + + df = sharepoint_to_df( + url=sharepoint_url, + credentials_secret=sharepoint_credentials_secret, + config_key=sharepoint_config_key, + sheet_name=sheet_name, + columns=columns, + ) + return df_to_databricks( + df=df, + schema=databricks_schema, + table=databricks_table, + if_exists=if_exists, + credentials_secret=databricks_credentials_secret, + config_key=databricks_config_key, + ) diff --git a/src/viadot/orchestration/prefect/flows/sharepoint_to_redshift_spectrum.py b/src/viadot/orchestration/prefect/flows/sharepoint_to_redshift_spectrum.py new file mode 100644 index 000000000..16a92fd73 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/sharepoint_to_redshift_spectrum.py @@ -0,0 +1,116 @@ +"""Flows for downloading data from Sharepoint and uploading it to AWS Redshift Spectrum.""" # noqa: W505 + +from typing import Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import ( + df_to_redshift_spectrum, + sharepoint_to_df, +) + + +@flow( + name="extract--sharepoint--redshift_spectrum", + description="Extract data from Sharepoint and load it into AWS Redshift Spectrum.", + retries=1, + retry_delay_seconds=60, +) +def sharepoint_to_redshift_spectrum( # noqa: PLR0913 + sharepoint_url: str, + to_path: str, + schema_name: str, + table: str, + extension: str = ".parquet", + if_exists: Literal["overwrite", "append"] = "overwrite", + partition_cols: list[str] | None = None, + index: bool = False, + compression: str | None = None, + sep: str = ",", + description: str | None = None, + aws_config_key: str | None = None, + sheet_name: str | list[str | int] | int | None = None, + columns: str | list[str] | list[int] | None = None, + na_values: list[str] | None = None, + sharepoint_credentials_secret: str | None = None, + sharepoint_config_key: str | None = None, + file_sheet_mapping: dict | None = None, +) -> None: + """Extract data from SharePoint and load it into AWS Redshift Spectrum. + + This function downloads data either from SharePoint file or the whole directory and + uploads it to AWS Redshift Spectrum. + + Modes: + If the `URL` ends with the file (e.g ../file.xlsx) it downloads only the file and + creates a table from it. + If the `URL` ends with the folder (e.g ../folder_name/): it downloads multiple files + and creates a table from them: + - If `file_sheet_mapping` is provided, it downloads and processes only + the specified files and sheets. + - If `file_sheet_mapping` is NOT provided, it downloads and processes all of + the files from the chosen folder. + + + Args: + sharepoint_url (str): The URL to the file. + to_path (str): Path to a S3 folder where the table will be located. Defaults to + None. + schema_name (str): AWS Glue catalog database name. + table (str): AWS Glue catalog table name. + partition_cols (list[str]): List of column names that will be used to create + partitions. Only takes effect if dataset=True. + extension (str): Required file type. Accepted file formats are 'csv' and + 'parquet'. + if_exists (str, optional): 'overwrite' to recreate any possible existing table + or 'append' to keep any possible existing table. Defaults to overwrite. + partition_cols (list[str], optional): List of column names that will be used to + create partitions. Only takes effect if dataset=True. Defaults to None. + index (bool, optional): Write row names (index). Defaults to False. + compression (str, optional): Compression style (None, snappy, gzip, zstd). + sep (str, optional): Field delimiter for the output file. Defaults to ','. + description (str, optional): AWS Glue catalog table description. Defaults to + None. + aws_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + sheet_name (str | list | int, optional): Strings are used for sheet names. + Integers are used in zero-indexed sheet positions (chart sheets do not count + as a sheet position). Lists of strings/integers are used to request multiple + sheets. Specify None to get all worksheets. Defaults to None. + columns (str | list[str] | list[int], optional): Which columns to ingest. + Defaults to None. + na_values (list[str] | None): Additional strings to recognize as NA/NaN. + If list passed, the specific NA values for each column will be recognized. + Defaults to None. + sharepoint_credentials_secret (str, optional): The name of the secret storing + Sharepoint credentials. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + sharepoint_config_key (str, optional): The key in the viadot config holding + relevant credentials. Defaults to None. + file_sheet_mapping (dict): A dictionary where keys are filenames and values are + the sheet names to be loaded from each file. If provided, only these files + and sheets will be downloaded. Defaults to None. + """ + df = sharepoint_to_df( + url=sharepoint_url, + sheet_name=sheet_name, + columns=columns, + na_values=na_values, + file_sheet_mapping=file_sheet_mapping, + credentials_secret=sharepoint_credentials_secret, + config_key=sharepoint_config_key, + ) + df_to_redshift_spectrum( + df=df, + to_path=to_path, + schema_name=schema_name, + table=table, + extension=extension, + if_exists=if_exists, + partition_cols=partition_cols, + index=index, + compression=compression, + sep=sep, + description=description, + config_key=aws_config_key, + ) diff --git a/src/viadot/orchestration/prefect/flows/sharepoint_to_s3.py b/src/viadot/orchestration/prefect/flows/sharepoint_to_s3.py new file mode 100644 index 000000000..568c56438 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/sharepoint_to_s3.py @@ -0,0 +1,47 @@ +"""Flows for downloading data from Sharepoint and uploading it to Amazon S3.""" + +from prefect import flow + +from viadot.orchestration.prefect.tasks import s3_upload_file, sharepoint_download_file + + +@flow( + name="extract--sharepoint--s3", + description="Flows for downloading data from Sharepoint and uploading it to Amazon S3.", + retries=1, + retry_delay_seconds=60, +) +def sharepoint_to_s3( + url: str, + local_path: str, + to_path: str, + sharepoint_credentials_secret: str | None = None, + sharepoint_config_key: str | None = None, + aws_config_key: str | None = None, +) -> None: + """Download a file from Sharepoint and upload it to S3. + + Args: + url (str): The URL of the file to be downloaded. + local_path (str): Local file directory. Defaults to None. + to_path (str): Where to download the file. + sharepoint_credentials_secret (str, optional): The name of the secret that + stores Sharepoint credentials. Defaults to None. More info on: + https://docs.prefect.io/concepts/blocks/ + sharepoint_config_key (str, optional): The key in the viadot config holding + relevant credentials. + aws_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + """ + sharepoint_download_file( + url=url, + to_path=local_path, + credentials_secret=sharepoint_credentials_secret, + config_key=sharepoint_config_key, + ) + + s3_upload_file( + from_path=local_path, + to_path=to_path, + config_key=aws_config_key, + ) diff --git a/src/viadot/orchestration/prefect/flows/sql_server_to_minio.py b/src/viadot/orchestration/prefect/flows/sql_server_to_minio.py new file mode 100644 index 000000000..7692433e6 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/sql_server_to_minio.py @@ -0,0 +1,61 @@ +"""Flows for downloading data from SQLServer and uploading it to MinIO.""" + +from typing import Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import df_to_minio, sql_server_to_df + + +@flow( + name="extract--sql_server--minio", + description="Extract data from SQLServer and load it into MinIO.", + retries=1, + retry_delay_seconds=60, +) +def sql_server_to_minio( + query: str, + path: str, + if_exists: Literal["error", "delete_matching", "overwrite_or_ignore"] = "error", + basename_template: str | None = None, + sql_server_credentials_secret: str | None = None, + sql_server_config_key: str | None = None, + minio_credentials_secret: str | None = None, + minio_config_key: str | None = None, +) -> None: + """Download a file from SQLServer and upload it to MinIO. + + Args: + query (str, required): The query to execute on the SQL Server database. + If the query doesn't start with "SELECT" returns an empty DataFrame. + path (str): Path to the MinIO file/folder. + basename_template (str, optional): A template string used to generate + base names of written data files. The token '{i}' will be replaced with + an automatically incremented integer. Defaults to None. + if_exists (Literal["error", "delete_matching", "overwrite_or_ignore"], + optional). What to do if the dataset already exists. Defaults to "error". + sql_server_credentials_secret (str, optional): The name of the secret storing + the credentials to the SQLServer. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + sql_server_config_key (str, optional): The key in the viadot config holding + relevant credentials to the SQLServer. Defaults to None. + minio_credentials_secret (str, optional): The name of the secret storing + the credentials to the MinIO. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + minio_config_key (str, optional): The key in the viadot config holding relevant + credentials to the MinIO. Defaults to None. + """ + df = sql_server_to_df( + query=query, + config_key=sql_server_config_key, + credentials_secret=sql_server_credentials_secret, + ) + + return df_to_minio( + df=df, + path=path, + if_exists=if_exists, + basename_template=basename_template, + config_key=minio_config_key, + credentials_secret=minio_credentials_secret, + ) diff --git a/src/viadot/orchestration/prefect/flows/transform.py b/src/viadot/orchestration/prefect/flows/transform.py new file mode 100644 index 000000000..b4e6f3b1a --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/transform.py @@ -0,0 +1,140 @@ +"""Build specified dbt model(s).""" + +import os +import shutil + +from prefect import flow, task + +from viadot.orchestration.prefect.tasks import clone_repo, dbt_task +from viadot.orchestration.prefect.utils import get_credentials + + +@task +def _cleanup_repo(dbt_repo_dir_name: str) -> None: + """Remove a repo folder. + + Args: + dbt_repo_dir_name (str): The name of the temporary folder. + """ + shutil.rmtree(dbt_repo_dir_name, ignore_errors=True) # Delete folder on run + + +@flow( + name="Transform", + description="Build specified dbt model(s).", + timeout_seconds=2 * 60 * 60, +) +def transform( + dbt_project_path: str, + dbt_repo_url: str | None = None, + dbt_repo_url_secret: str | None = None, + dbt_repo_branch: str | None = None, + token_secret: str | None = None, + local_dbt_repo_path: str | None = None, + dbt_selects: dict[str, str] | None = None, + dbt_target: str | None = None, +) -> None: + """Build specified dbt model(s). + + This flow implements a simplified version of the `transform_and_catalog()` flow, + excluding metadata handling, source freshness checks, and stateful operations. + + Args: + dbt_project_path (str): The path to the dbt project (the directory containing + the `dbt_project.yml` file). + dbt_repo_url (str, optional): The URL for cloning the dbt repo with relevant + dbt project. + dbt_repo_url_secret (str, optional): Alternatively to above, the secret + containing `dbt_repo_url`. + dbt_repo_branch (str, optional): The branch of the dbt repo to use. + token_secret (str, optional): The name of the secret storing the git token. + Defaults to None. + local_dbt_repo_path (str, optional): The path where to clone the repo to. + dbt_selects (dict, optional): Valid + [dbt node selection](https://docs.getdbt.com/reference/node-selection/syntax) + expressions. Valid keys are `run`, `test`, and `source_freshness`. The test + select expression is taken from run's, as long as run select is + provided. Defaults to None. + dbt_target (str): The dbt target to use. If not specified, the default dbt + target (as specified in `profiles.yaml`) will be used. Defaults to None. + + Returns: + list[str]: Lines from stdout of the `upload_metadata` task as a list. + + Examples: + # Build a single model + ```python + import os + from prefect_viadot.flows import transform_and_catalog + + my_dbt_project_path = os.path.expanduser("~/dbt/my_dbt_project") + + transform_and_catalog( + dbt_project_path=my_dbt_project_path, + dbt_selects={"run": "my_model"} + ) + ``` + + Some common `dbt_select` patterns: + - build a model and all its downstream dependencies: `dbt_select="my_model+"` + - build all models in a directory: `dbt_select="models/my_project"` + ``` + """ + dbt_repo_url = dbt_repo_url or get_credentials(dbt_repo_url_secret) + local_dbt_repo_path = ( + os.path.expandvars(local_dbt_repo_path) + if local_dbt_repo_path is not None + else "tmp_dbt_repo_dir" + ) + + clone = clone_repo( + url=dbt_repo_url, + checkout_branch=dbt_repo_branch, + token_secret=token_secret, + path=local_dbt_repo_path, + ) + + # dbt CLI does not handle passing --target=None + dbt_target_option = f"-t {dbt_target}" if dbt_target is not None else "" + + # Clean up artifacts from previous runs (`target/` dir and packages) + dbt_clean_task = dbt_task.with_options(name="dbt_task_clean") + dbt_clean_up = dbt_clean_task( + project_path=dbt_project_path, command="clean", wait_for=[clone] + ) + dbt_pull_deps_task = dbt_task.with_options(name="dbt_task_deps") + pull_dbt_deps = dbt_pull_deps_task( + project_path=dbt_project_path, + command="deps", + wait_for=[dbt_clean_up], + ) + + run_select = dbt_selects.get("run") + run_select_safe = f"-s {run_select}" if run_select is not None else "" + run = dbt_task( + project_path=dbt_project_path, + command=f"run {run_select_safe} {dbt_target_option}", + wait_for=[pull_dbt_deps], + ) + + # Generate docs + # Produces `catalog.json`, `run-results.json`, and `manifest.json` + dbt_docs_generate_task = dbt_task.with_options(name="dbt_task_docs_generate") + generate_catalog_json = dbt_docs_generate_task( + project_path=dbt_project_path, + command=f"docs generate {dbt_target_option} --no-compile", + wait_for=[run], + ) + + test_select = dbt_selects.get("test", run_select) + test_select_safe = f"-s {test_select}" if test_select is not None else "" + test = dbt_task( + project_path=dbt_project_path, + command=f"test {test_select_safe} {dbt_target_option}", + wait_for=[generate_catalog_json], + ) + + _cleanup_repo( + local_dbt_repo_path, + wait_for=[test], + ) diff --git a/src/viadot/orchestration/prefect/flows/transform_and_catalog.py b/src/viadot/orchestration/prefect/flows/transform_and_catalog.py new file mode 100644 index 000000000..cd3c5b4b1 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/transform_and_catalog.py @@ -0,0 +1,183 @@ +"""Build specified dbt model(s) and upload the generated metadata to Luma.""" + +from pathlib import Path +import shutil +from typing import Literal + +from prefect import allow_failure, flow, task + +from viadot.orchestration.prefect.tasks import clone_repo, dbt_task, luma_ingest_task +from viadot.orchestration.prefect.utils import get_credentials + + +@task +def remove_dbt_repo_dir(dbt_repo_dir_name: str) -> None: + """Remove the repo directory. + + Args: + dbt_repo_dir_name (str): The name of the dbt repo directory. + """ + shutil.rmtree(dbt_repo_dir_name, ignore_errors=True) + + +@flow( + name="Transform and Catalog", + description="Build specified dbt model(s) and upload generated metadata to Luma.", + timeout_seconds=2 * 60 * 60, +) +def transform_and_catalog( # noqa: PLR0913 + dbt_repo_url: str | None = None, + dbt_repo_url_secret: str | None = None, + dbt_project_path: str = "dbt", + dbt_repo_branch: str | None = None, + dbt_repo_token_secret: str | None = None, + dbt_selects: dict[str, str] | None = None, + dbt_target: str | None = None, + dbt_target_dir_path: str | None = None, + luma_url: str = "http://localhost:8000", + luma_follow: bool = False, + metadata_kind: Literal["model", "model_run"] = "model_run", +) -> list[str]: + """Build specified dbt model(s) and upload the generated metadata to Luma. + + Supports ingesting both model and model run metadata (controlled by the + `metadata_kind` parameter). + + Note that metadata is still ingested even if the preceding `dbt test` task fails. + This is done in order to capture test failure metadata in the data catalog. + + Args: + dbt_repo_url (str, optional): The URL for cloning the dbt repo with relevant + dbt project. Defaults to None. + dbt_repo_url_secret (str, optional): Alternatively to above, the secret + containing `dbt_repo_url`. Defaults to None. + dbt_project_path (str): Path to the dbt project directory, relative to the + dbt repository's root. For example, "dbt/my_dbt_project". Defaults to "dbt". + dbt_repo_branch (str, optional): The branch of the dbt repo to use. Defaults to + None (default repo branch). + dbt_repo_token_secret (str, optional): The secret containing the personal access + token used to clone the dbt repository, in case it's private. Not required + if token is already included in `dbt_repo_url` (which is NOT recommended). + Defaults to None. + dbt_selects (dict, optional): Valid + [dbt node selection](https://docs.getdbt.com/reference/node-selection/syntax) + expressions. Valid keys are `run`, `test`, and `source_freshness`. The test + select expression is taken from run's, as long as run select is + provided. Defaults to None. + dbt_target (str): The dbt target to use. If not specified, the default dbt + target (as specified in `profiles.yaml`) will be used. Defaults to None. + dbt_target_dir_path (str): The path to your dbt project's target + directory, which contains dbt artifact JSON files, relative + to dbt project's root directory. By default, + `//target`, since "target" is the default + name of the directory generated by dbt. + luma_url (str, optional): The URL of the Luma instance to ingest into. + Defaults to "http://localhost:8000". + luma_follow (bool, optional): Whether to follow the ingestion process until it's + completed (by default, ingestion request is sent without awaiting for the + response). By default, `False`. + metadata_kind (Literal["model", "model_run"], optional): The kind of metadata + to ingest. Defaults to "model_run". + + Returns: + list[str]: Lines from stdout of the `upload_metadata` task as a list. + + Examples: + # Build staging models. + + ```python + import os + from prefect_viadot.flows import transform_and_catalog + + my_dbt_repo_url = "https://github.com/dbt-labs/jaffle_shop" + my_luma_url = "http://localhost:8000" + + transform_and_catalog( + dbt_repo_url=my_dbt_repo_url + dbt_selects={"run": "staging"} + luma_url=my_luma_url, + ) + ``` + + Some common `dbt_select` patterns: + - build a model and all its downstream dependencies: `dbt_select="my_model+"` + - build all models in a directory: `dbt_select="models/staging"` + """ + # Clone the dbt project. + dbt_repo_url = dbt_repo_url or get_credentials(dbt_repo_url_secret) + clone = clone_repo( + url=dbt_repo_url, + checkout_branch=dbt_repo_branch, + token_secret=dbt_repo_token_secret, + ) + + # Prepare the environment. + dbt_repo_name = dbt_repo_url.split("/")[-1].replace(".git", "") + dbt_project_path_full = Path(dbt_repo_name) / dbt_project_path + dbt_pull_deps_task = dbt_task.with_options(name="dbt_deps") + pull_dbt_deps = dbt_pull_deps_task( + project_path=dbt_project_path_full, + command="deps", + wait_for=[clone], + ) + + # Run dbt commands. + dbt_target_option = f"-t {dbt_target}" if dbt_target is not None else "" + + if metadata_kind == "model_run": + # Produce `run-results.json` artifact for Luma ingestion. + if dbt_selects: + run_select = dbt_selects.get("run") + test_select = dbt_selects.get("test", run_select) + + run_select_safe = f"-s {run_select}" if run_select is not None else "" + test_select_safe = f"-s {test_select}" if test_select is not None else "" + else: + run_select_safe = "" + test_select_safe = "" + + run_task = dbt_task.with_options(name="dbt_run") + run = run_task( + project_path=dbt_project_path_full, + command=f"run {run_select_safe} {dbt_target_option}", + wait_for=[pull_dbt_deps], + ) + + test_task = dbt_task.with_options(name="dbt_test") + test = test_task( + project_path=dbt_project_path_full, + command=f"test {test_select_safe} {dbt_target_option}", + raise_on_failure=False, + wait_for=[run], + ) + upload_metadata_upstream_task = test + + else: + # Produce `catalog.json` and `manifest.json` artifacts for Luma ingestion. + docs_generate_task = dbt_task.with_options(name="dbt_docs_generate") + docs = docs_generate_task( + project_path=dbt_project_path_full, + command="docs generate", + wait_for=[pull_dbt_deps], + ) + upload_metadata_upstream_task = docs + + # Upload metadata to Luma. + if dbt_target_dir_path is None: + dbt_target_dir_path = dbt_project_path_full / "target" + + upload_metadata = luma_ingest_task( + metadata_kind=metadata_kind, + metadata_dir_path=dbt_target_dir_path, + luma_url=luma_url, + follow=luma_follow, + wait_for=[upload_metadata_upstream_task], + ) + + # Cleanup. + remove_dbt_repo_dir( + dbt_repo_name, + wait_for=[allow_failure(upload_metadata)], + ) + + return upload_metadata diff --git a/src/viadot/orchestration/prefect/tasks/__init__.py b/src/viadot/orchestration/prefect/tasks/__init__.py new file mode 100644 index 000000000..cbe8f7276 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/__init__.py @@ -0,0 +1,51 @@ +"""Imports.""" + +from .adls import adls_upload, df_to_adls +from .bcp import bcp +from .cloud_for_customers import cloud_for_customers_to_df +from .databricks import df_to_databricks +from .dbt import dbt_task +from .duckdb import duckdb_query +from .exchange_rates import exchange_rates_to_df +from .genesys import genesys_to_df +from .git import clone_repo +from .hubspot import hubspot_to_df +from .luma import luma_ingest_task +from .mindful import mindful_to_df +from .minio import df_to_minio +from .outlook import outlook_to_df +from .redshift_spectrum import df_to_redshift_spectrum +from .s3 import s3_upload_file +from .sap_rfc import sap_rfc_to_df +from .sharepoint import ( + sharepoint_download_file, + sharepoint_to_df, +) +from .sql_server import create_sql_server_table, sql_server_query, sql_server_to_df + + +__all__ = [ + "adls_upload", + "df_to_adls", + "bcp", + "cloud_for_customers_to_df", + "df_to_databricks", + "dbt_task", + "duckdb_query", + "exchange_rates_to_df", + "genesys_to_df", + "clone_repo", + "hubspot_to_df", + "luma_ingest_task", + "mindful_to_df", + "df_to_minio", + "outlook_to_df", + "df_to_redshift_spectrum", + "s3_upload_file", + "sap_rfc_to_df", + "sharepoint_download_file", + "sharepoint_to_df", + "create_sql_server_table", + "sql_server_query", + "sql_server_to_df", +] diff --git a/src/viadot/orchestration/prefect/tasks/adls.py b/src/viadot/orchestration/prefect/tasks/adls.py new file mode 100644 index 000000000..8b780c7bf --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/adls.py @@ -0,0 +1,92 @@ +"""Tasks for interacting with Azure Data Lake (gen2).""" + +import contextlib + +import pandas as pd +from prefect import task + +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials + + +with contextlib.suppress(ImportError): + from viadot.sources import AzureDataLake + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def adls_upload( + to_path: str, + from_path: str | None = None, + recursive: bool = False, + overwrite: bool = False, + credentials_secret: str | None = None, + config_key: str | None = None, +) -> None: + """Upload file(s) to Azure Data Lake. + + Credentials can be specified as either a key inside viadot config file, + or the name of the Prefect `AzureKeyVaultSecretReference` block document + storing the reference to an Azure Key Vault secret. + + Args: + to_path (str, optional): The destination path. + recursive (bool, optional): Set this to true if uploading entire directories. + Defaults to False. + from_path (str, optional): The local path from which to upload the file(s). + Defaults to None. + overwrite (bool, optional): Whether to overwrite files in the lake. Defaults + to False. + credentials_secret (str, optional): The name of the Azure Key Vault secret + storing the credentials. + config_key (str, optional): The key in the viadot config holding relevant + credentials. + """ + if not (credentials_secret or config_key): + raise MissingSourceCredentialsError + + credentials = get_credentials(credentials_secret) + lake = AzureDataLake(credentials=credentials, config_key=config_key) + + lake.upload( + from_path=from_path, + to_path=to_path, + recursive=recursive, + overwrite=overwrite, + ) + + +@task(retries=3, retry_delay_seconds=10) +def df_to_adls( + df: pd.DataFrame, + path: str, + sep: str = "\t", + credentials_secret: str | None = None, + config_key: str | None = None, + overwrite: bool = False, +) -> None: + r"""Upload a pandas `DataFrame` to a file on Azure Data Lake. + + Args: + df (pd.DataFrame): The pandas DataFrame to upload. + path (str): The destination path. Defaults to None. + sep (str, optional): The separator to use in the `to_csv` function. Defaults to + "\t". + overwrite (bool, optional): Whether to overwrite files in the lake. Defaults + to False. + credentials_secret (str, optional): The name of the Azure Key Vault secret + storing the credentials. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + """ + if not (credentials_secret or config_key): + raise MissingSourceCredentialsError + + credentials = get_credentials(credentials_secret) + lake = AzureDataLake(credentials=credentials, config_key=config_key) + + lake.from_df( + df=df, + path=path, + sep=sep, + overwrite=overwrite, + ) diff --git a/src/viadot/orchestration/prefect/tasks/bcp.py b/src/viadot/orchestration/prefect/tasks/bcp.py new file mode 100644 index 000000000..73b0444a5 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/bcp.py @@ -0,0 +1,71 @@ +"""Tasks for bulk copying data from a CSV file into an SQLServer table using BCP.""" + +from typing import Literal + +from prefect import task + +from viadot.config import get_source_credentials +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials, shell_run_command + + +@task +def bcp( + path: str | None = None, + schema: str | None = None, + table: str | None = None, + chunksize: int = 5000, + error_log_file_path: str = "./log_file.log", + on_error: Literal["skip", "fail"] = "skip", + credentials_secret: str | None = None, + config_key: str | None = None, +) -> None: + """Upload data from a CSV file into an SQLServer table using BCP. + + For more information on bcp (bulk copy program), see + https://learn.microsoft.com/en-us/sql/tools/bcp-utility. + + Args: + path (str): Where to store the CSV data dump used for bulk upload to a database. + schema (str, optional): Destination schema. Defaults to None. + table (str, optional): Destination table. Defaults to None. + chunksize (int, optional): Size of a chunk to use in the bcp function. + Defaults to 5000. + error_log_file_path (string, optional): Full path of an error file. Defaults + to "./log_file.log". + on_error (str, optional): What to do in case of a bcp error. Defaults to "skip". + credentials_secret (str, optional): The name of the secret storing + the credentials to the SQLServer. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + config_key (str, optional): The key in the viadot config holding relevant + credentials to the SQLServer. Defaults to None. + """ + if not (credentials_secret or config_key): + raise MissingSourceCredentialsError + + credentials = get_source_credentials(config_key) or get_credentials( + credentials_secret + ) + + fqn = f"{schema}.{table}" if schema else table + + server = credentials["server"] + db_name = credentials["db_name"] + uid = credentials["user"] + pwd = credentials["password"] + + if "," in server: + # A space after the comma is allowed in the ODBC connection string + # but not in BCP's 'server' argument. + server = server.replace(" ", "") + + if on_error == "skip": + max_error = 0 + elif on_error == "fail": + max_error = 1 + else: + msg = "Please provide correct 'on_error' parameter value - 'skip' or 'fail'." + raise ValueError(msg) + + command = f"/opt/mssql-tools/bin/bcp {fqn} in '{path}' -S {server} -d {db_name} -U {uid} -P '{pwd}' -c -F 2 -b {chunksize} -h 'TABLOCK' -e '{error_log_file_path}' -m {max_error}" + shell_run_command(command=command) diff --git a/src/viadot/orchestration/prefect/tasks/cloud_for_customers.py b/src/viadot/orchestration/prefect/tasks/cloud_for_customers.py new file mode 100644 index 000000000..b5229033c --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/cloud_for_customers.py @@ -0,0 +1,53 @@ +"""Tasks for interacting with Cloud for Customers.""" + +from typing import Any + +import pandas as pd +from prefect import task + +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import CloudForCustomers + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def cloud_for_customers_to_df( + url: str | None = None, + endpoint: str | None = None, + report_url: str | None = None, + filter_params: dict[str, Any] | None = None, + credentials_secret: str | None = None, + config_key: str | None = None, + **kwargs: dict[str, Any] | None, +) -> pd.DataFrame: + """Extracts Cloud for Customers records as pd.DataFrame. + + Args: + url (str, optional): The API url. + endpoint (str, optional): The API endpoint. + report_url (str, optional): The API url in case of prepared report. + filter_params (dict[str, Any], optional): Query parameters. + credentials_secret (str, optional): The name of the secret storing the + credentials. + More info on: https://docs.prefect.io/concepts/blocks/ + config_key (str, optional): The key in the viadot config holding relevant + credentials. + credentials (dict, optional): Cloud for Customers credentials. + kwargs: The parameters to pass to DataFrame constructor. + + Returns: + pd.Dataframe: The pandas `DataFrame` containing data from the file. + """ + if not (credentials_secret or config_key): + msg = "Either `credentials_secret` or `config_key` has to be specified and not empty." + raise ValueError(msg) + + credentials = get_credentials(credentials_secret) + c4c = CloudForCustomers( + url=url, + endpoint=endpoint, + report_url=report_url, + filter_params=filter_params, + credentials=credentials, + config_key=config_key, + ) + return c4c.to_df(**kwargs) diff --git a/src/viadot/orchestration/prefect/tasks/databricks.py b/src/viadot/orchestration/prefect/tasks/databricks.py new file mode 100644 index 000000000..1a9af697c --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/databricks.py @@ -0,0 +1,77 @@ +"""Tasks for interacting with Databricks.""" + +import contextlib +from typing import Literal + +import pandas as pd +from prefect import task + + +with contextlib.suppress(ImportError): + from viadot.sources import Databricks + +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def df_to_databricks( + df: pd.DataFrame, + table: str, + schema: str | None = None, + if_exists: Literal["replace", "skip", "fail"] = "fail", + if_empty: Literal["warn", "skip", "fail"] = "warn", + credentials_secret: str | None = None, + config_key: str | None = None, +) -> None: + """Insert a pandas `DataFrame` into a Delta table. + + Args: + df (pd.DataFrame): A pandas `DataFrame` with the data + to be inserted into the table. + table (str): The name of the target table. + schema (str, optional): The name of the target schema. + if_exists (str, Optional): What to do if the table already exists. + One of 'replace', 'skip', and 'fail'. + if_empty (str, optional): What to do if the input `DataFrame` is empty. + Defaults to 'warn'. + credentials_secret (str, optional): The name of the secret storing + the credentials. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + + Example: + ```python + from prefect_viadot.tasks df_to_databricks + from prefect import flow + import pandas as pd + + @flow + def insert_df_into_databricks(): + list = [{"id":"1", "name":"Joe"}] + df = pd.DataFrame(list) + insert = df_to_databricks( + df=df, + schema="prefect_viadot_test" + table="test", + if_exists="replace" + ) + return insert + + insert_df_into_databricks() + ``` + """ + if not (credentials_secret or config_key): + raise MissingSourceCredentialsError + + credentials = get_credentials(credentials_secret) + databricks = Databricks( + credentials=credentials, + config_key=config_key, + ) + if schema and not databricks._check_if_schema_exists(schema): + databricks.create_schema(schema) + databricks.create_table_from_pandas( + df=df, schema=schema, table=table, if_exists=if_exists, if_empty=if_empty + ) diff --git a/src/viadot/orchestration/prefect/tasks/dbt.py b/src/viadot/orchestration/prefect/tasks/dbt.py new file mode 100644 index 000000000..01db4152c --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/dbt.py @@ -0,0 +1,71 @@ +"""Tasks for interacting with [dbt](https://www.getdbt.com/).""" + +import logging +import os +from typing import Any + +from prefect import task +from prefect.logging import get_run_logger + +from viadot.orchestration.prefect.utils import shell_run_command + + +@task(retries=0, timeout_seconds=2 * 60 * 60) +async def dbt_task( + command: str = "run", + project_path: str | None = None, + env: dict[str, Any] | None = None, + shell: str = "bash", + return_all: bool = False, + stream_level: int = logging.INFO, + raise_on_failure: bool = True, +) -> list[str] | str: + """Runs dbt commands within a shell. + + Args: + command: dbt command to be executed; can also be provided post-initialization + by calling this task instance. + project_path: The path to the dbt project. + env: Dictionary of environment variables to use for the subprocess; can also be + provided at runtime. + shell: Shell to run the command with. + return_all: Whether this task should return all lines of stdout as a list, or + just the last line as a string. + stream_level: The logging level of the stream; defaults to 20, equivalent to + `logging.INFO`. + raise_on_failure: Whether to fail the task if the command fails. + + Returns: + If return all, returns all lines as a list; else the last line as a string. + + Example: + Executes `dbt run` on a specified dbt project. + ```python + from prefect import flow + from viadot.tasks import dbt_task + + PROJECT_PATH = "/home/viadot/dbt/my_dbt_project" + + @flow + def example_dbt_task_flow(): + return dbt_task( + command="run", project_path=PROJECT_PATH, return_all=True + ) + + example_dbt_task_flow() + ``` + """ + logger = get_run_logger() + + project_path = os.path.expandvars(project_path) if project_path is not None else "." + + return await shell_run_command( + command=f"dbt {command}", + env=env, + helper_command=f"cd {project_path}", + shell=shell, + return_all=return_all, + stream_level=stream_level, + raise_on_failure=raise_on_failure, + logger=logger, + ) diff --git a/src/viadot/orchestration/prefect/tasks/duckdb.py b/src/viadot/orchestration/prefect/tasks/duckdb.py new file mode 100644 index 000000000..314a3e450 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/duckdb.py @@ -0,0 +1,52 @@ +"""Tasks for running query in DuckDB.""" + +from typing import Any, Literal + +from prefect import task +from prefect.logging import get_run_logger + +from viadot.config import get_source_credentials +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import DuckDB +from viadot.sources.base import Record + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def duckdb_query( + query: str, + fetch_type: Literal["record", "dataframe"] = "record", + # Specifying credentials in a dictionary is not recommended in viadot tasks, + # but in this case credentials can include only database name. + credentials: dict[str, Any] | None = None, + credentials_secret: str | None = None, + config_key: str | None = None, +) -> list[Record] | bool: + """Run query on a DuckDB database. + + Args: + query (str, required): The query to execute on the DuckDB database. + fetch_type (str, optional): In which form the data should be returned. + Defaults to "record". + credentials (dict[str, Any], optional): Credentials to the Database. Defaults to + None. + credentials_secret (str, optional): The name of the secret storing credentials + to the database. Defaults to None. More info on: + https://docs.prefect.io/concepts/blocks/ + config_key (str, optional): The key in the viadot config holding relevant + credentials to the database. Defaults to None. + """ + if not (credentials or credentials_secret or config_key): + raise MissingSourceCredentialsError + + logger = get_run_logger() + + credentials = ( + credentials + or get_source_credentials(config_key) + or get_credentials(credentials_secret) + ) + duckdb = DuckDB(credentials=credentials) + result = duckdb.run_query(query=query, fetch_type=fetch_type) + logger.info("Query has been executed successfully.") + return result diff --git a/src/viadot/orchestration/prefect/tasks/exchange_rates.py b/src/viadot/orchestration/prefect/tasks/exchange_rates.py new file mode 100644 index 000000000..1f5b63103 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/exchange_rates.py @@ -0,0 +1,86 @@ +"""Tasks for interacting with the Exchange Rates API.""" + +from datetime import datetime +from typing import Any, Literal + +import pandas as pd +from prefect import task + +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import ExchangeRates + + +Currency = Literal[ + "USD", "EUR", "GBP", "CHF", "PLN", "DKK", "COP", "CZK", "SEK", "NOK", "ISK" +] + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def exchange_rates_to_df( + currency: Currency = "USD", + credentials_secret: str | None = None, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, + start_date: str = datetime.today().strftime("%Y-%m-%d"), + end_date: str = datetime.today().strftime("%Y-%m-%d"), + symbols: list[str] | None = None, + tests: dict | None = None, +) -> pd.DataFrame: + """Loads exchange rates from the Exchange Rates API into a pandas DataFrame. + + Args: + currency (Currency, optional): Base currency to which prices of searched + currencies are related. Defaults to "USD". + credentials_secret (str, optional): The name of the secret storing + the credentials. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + credentials (dict[str, str], optional): The credentials as a dictionary. + Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. + Defaults to None. + start_date (str, optional): Initial date for data search. + Data range is start_date -> end_date, + supported format 'yyyy-mm-dd'. + Defaults to datetime.today().strftime("%Y-%m-%d"). + end_date (str, optional): See above. + Defaults to datetime.today().strftime("%Y-%m-%d"). + symbols (list[str], optional): List of ISO codes of currencies for which + exchange rates from base currency will be fetched. Defaults to + ["USD","EUR","GBP","CHF","PLN","DKK","COP","CZK","SEK","NOK","ISK"]. + tests (dict[str], optional): A dictionary with optional list of tests + to verify the output dataframe. If defined, triggers the `validate` function + from viadot.utils. Defaults to None. + + Returns: + pd.DataFrame: The pandas `DataFrame` containing data from the file. + """ + if not (credentials_secret or config_key or credentials): + raise MissingSourceCredentialsError + + if not symbols: + symbols = [ + "USD", + "EUR", + "GBP", + "CHF", + "PLN", + "DKK", + "COP", + "CZK", + "SEK", + "NOK", + "ISK", + ] + + credentials = credentials or get_credentials(credentials_secret) + e = ExchangeRates( + currency=currency, + start_date=start_date, + end_date=end_date, + symbols=symbols, + credentials=credentials, + config_key=config_key, + ) + return e.to_df(tests=tests) diff --git a/src/viadot/orchestration/prefect/tasks/genesys.py b/src/viadot/orchestration/prefect/tasks/genesys.py new file mode 100644 index 000000000..f248e826c --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/genesys.py @@ -0,0 +1,108 @@ +"""Task for downloading data from Genesys Cloud API.""" + +from typing import Any + +import pandas as pd +from prefect import get_run_logger, task + +from viadot.exceptions import APIError +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import Genesys + + +@task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=2 * 60 * 60) +def genesys_to_df( # noqa: PLR0913 + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + verbose: bool | None = None, + endpoint: str | None = None, + environment: str = "mypurecloud.de", + queues_ids: list[str] | None = None, + view_type: str | None = None, + view_type_time_sleep: int | None = None, + post_data_list: list[dict[str, Any]] | None = None, + normalization_sep: str = ".", + drop_duplicates: bool = False, + validate_df_dict: dict[str, Any] | None = None, +) -> pd.DataFrame: + """Task to download data from Genesys Cloud API. + + Args: + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + verbose (bool, optional): Increase the details of the logs printed on the + screen. Defaults to False. + endpoint (Optional[str], optional): Final end point to the API. + Defaults to None. + environment (str, optional): the domain that appears for Genesys Cloud + Environment based on the location of your Genesys Cloud organization. + Defaults to "mypurecloud.de". + queues_ids (Optional[List[str]], optional): List of queues ids to consult the + members. Defaults to None. + view_type (Optional[str], optional): The type of view export job to be created. + Defaults to None. + view_type_time_sleep (Optional[int], optional): Waiting time to retrieve data + from Genesys Cloud API. Defaults to None. + post_data_list (Optional[List[Dict[str, Any]]], optional): List of string + templates to generate json body in POST calls to the API. Defaults to None. + normalization_sep (str, optional): Nested records will generate names separated + by sep. Defaults to ".". + drop_duplicates (bool, optional): Remove duplicates from the DataFrame. + Defaults to False. + validate_df_dict (Optional[Dict[str, Any]], optional): A dictionary with + optional list of tests to verify the output dataframe. Defaults to None. + + Examples: + data_frame = genesys_to_df( + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + verbose=verbose, + endpoint=endpoint, + environment=environment, + queues_ids=queues_ids, + view_type=view_type, + view_type_time_sleep=view_type_time_sleep, + post_data_list=post_data_list, + normalization_sep=normalization_sep, + validate_df_dict=validate_df_dict, + ) + + Returns: + pd.DataFrame: The response data as a pandas DataFrame. + """ + logger = get_run_logger() + + if not (azure_key_vault_secret or config_key): + raise MissingSourceCredentialsError + + if not config_key: + credentials = get_credentials(azure_key_vault_secret) + + if endpoint is None: + msg = "The API endpoint parameter was not defined." + raise APIError(msg) + + genesys = Genesys( + credentials=credentials, + config_key=config_key, + verbose=verbose, + environment=environment, + ) + logger.info("running `api_connection` method:\n") + genesys.api_connection( + endpoint=endpoint, + queues_ids=queues_ids, + view_type=view_type, + view_type_time_sleep=view_type_time_sleep, + post_data_list=post_data_list, + normalization_sep=normalization_sep, + ) + logger.info("running `to_df` method:\n") + + return genesys.to_df( + drop_duplicates=drop_duplicates, + validate_df_dict=validate_df_dict, + ) diff --git a/src/viadot/orchestration/prefect/tasks/git.py b/src/viadot/orchestration/prefect/tasks/git.py new file mode 100644 index 000000000..f55e5672c --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/git.py @@ -0,0 +1,68 @@ +"""Tasks for interacting with git repositories.""" + +import logging +import shutil +from typing import Any + +from prefect import get_run_logger, task +import pygit2 + +from viadot.orchestration.prefect.utils import get_credentials + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 10) +def clone_repo( + url: str, + token: str | None = None, + token_secret: str | None = None, + logger: logging.Logger | None = None, + **kwargs: dict[str, Any] | None, +) -> None: + """Clone Azure DevOps or GitHub repository. + + Args: + url (str): Alternatively to URL + token or URL + token_secret, you can also + provide the full URL (including token, if the repo is private) here. + token (str, optional): The token to use to clone the repo. + token_secret (str, optional): The secret holding the token. + logger (logging.Logger): The logger to use. By default, Prefect's task run + logger is used. + token (str, optional): The personal access token. + token_secret (str, optional): The name of the secret storing the token. Defaults + to None. + logger (logging.Logger, optional): The logger to use for logging the task's + output. By default, Prefect's task run logger. + kwargs (dict): Keyword arguments to be passed to `pygit2.clone_repository()`. + **kwargs (dict, optional): Keyword arguments to pass to + `pygit2.clone_repository()`. + + Examples: + Azure DevOps (public repo): + https://dev.azure.com/{organization_name}/{project_name}/_git/{repo_name} + Azure DevOps (private repo): + https://{token}@dev.azure.com/{organization_name}/{project_name}/_git/{repo_name} + GitHub (public repo): https://github.com/{organization_name}/{repo_name}.git + GitHub (private repo): https://{token}@github.com/{organization_name}/{repo_name}.git + """ + if not logger: + logger = get_run_logger() + + url = url.strip("/") + + if token_secret: + token = get_credentials(token_secret) + + if token: + url = url.replace("https://dev.azure.com", f"https://{token}@dev.azure.com") + url = url.replace("https://github.com", f"https://{token}@github.com") + + repo_name = url.split("/")[-1].replace(".git", "") + path = kwargs.get("path") or repo_name + kwargs["path"] = path + + logger.info(f"Removing {path}...") + shutil.rmtree(path, ignore_errors=True) # Delete folder on run + + logger.info(f"Cloning repo '{repo_name}' into {path}...") + pygit2.clone_repository(url, **kwargs) + logger.info(f"Repo '{repo_name}' has been successfully cloned into {path}.") diff --git a/src/viadot/orchestration/prefect/tasks/hubspot.py b/src/viadot/orchestration/prefect/tasks/hubspot.py new file mode 100644 index 000000000..012caee5f --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/hubspot.py @@ -0,0 +1,70 @@ +"""Task for downloading data from Hubspot API to a pandas DataFrame.""" + +from typing import Any + +import pandas as pd +from prefect import task + +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import Hubspot + + +@task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) +def hubspot_to_df( + endpoint: str, + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + filters: list[dict[str, Any]] | None = None, + properties: list[Any] | None = None, + nrows: int = 1000, +) -> pd.DataFrame: + """Task to download data from Hubspot API to a pandas DataFrame. + + Args: + endpoint (str): API endpoint for an individual request. + config_key (Optional[str], optional): The key in the viadot config holding + relevant credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + filters (Optional[List[Dict[str, Any]]], optional): Filters defined for the API + body in specific order. Defaults to None. + properties (Optional[List[Any]], optional): List of user-defined columns to be + pulled from the API. Defaults to None. + nrows (int, optional): Max number of rows to pull during execution. + Defaults to 1000. + + Examples: + data_frame = hubspot_to_df( + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + endpoint=endpoint, + filters=filters, + properties=properties, + nrows=nrows, + ) + + Raises: + MissingSourceCredentialsError: If no credentials have been provided. + + Returns: + pd.DataFrame: The response data as a pandas DataFrame. + """ + if not (azure_key_vault_secret or config_key): + raise MissingSourceCredentialsError + + if not config_key: + credentials = get_credentials(azure_key_vault_secret) + + hubspot = Hubspot( + credentials=credentials, + config_key=config_key, + ) + hubspot.api_connection( + endpoint=endpoint, + filters=filters, + properties=properties, + nrows=nrows, + ) + + return hubspot.to_df() diff --git a/src/viadot/orchestration/prefect/tasks/luma.py b/src/viadot/orchestration/prefect/tasks/luma.py new file mode 100644 index 000000000..34404ed05 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/luma.py @@ -0,0 +1,90 @@ +"""Tasks for interacting with [Luma](https://github.com/dyvenia/luma).""" + +import logging +import os +from pathlib import Path +from typing import Any, Literal + +from prefect import task +from prefect.logging import get_run_logger + +from viadot.orchestration.prefect.utils import shell_run_command + + +@task(retries=2, retry_delay_seconds=5, timeout_seconds=60 * 10) +async def luma_ingest_task( # noqa: PLR0913 + metadata_dir_path: str | Path, + luma_url: str = "http://localhost:8000", + metadata_kind: Literal["model", "model_run"] = "model", + follow: bool = False, + env: dict[str, Any] | None = None, + shell: str = "bash", + return_all: bool = True, + stream_level: int = logging.INFO, + max_retries: int = 3, # noqa: ARG001 + logger: logging.Logger | None = None, + raise_on_failure: bool = True, +) -> list[str]: + """Runs Luma ingestion by sending dbt artifacts to Luma ingestion API. + + Args: + metadata_dir_path: The path to the directory containing metadata files. + In the case of dbt, it's dbt project's `target` directory, + which contains dbt artifacts (`sources.json`, `catalog.json`, + `manifest.json`, and `run_results.json`). + luma_url: The URL of the Luma instance to ingest into. + metadata_kind: The kind of metadata to ingest. Either `model` or `model_run`. + follow: Whether to follow the ingestion process until it's completed (by + default, ingestion request is sent without awaiting for the response). By + default, `False`. + env: Dictionary of environment variables to use for + the subprocess; can also be provided at runtime. + shell: Shell to run the command with. + return_all: Whether this task should return all lines of stdout as a list, + or just the last line as a string. + stream_level: The logging level of the stream; + defaults to 20; equivalent to `logging.INFO`. + max_retries: The maximum number of times to retry the task. Defaults to 3. + logger: The logger to use for logging the task's output. By default, the + Prefect's task run logger. + raise_on_failure: Whether to raise an exception if the command fails. + + Returns: + list[str]: Lines from stdout as a list. + + Example: + ```python + from prefect import flow + from viadot.tasks import luma_ingest_task + + metadata_dir_path = "${HOME}/dbt/my_dbt_project/target" + + @flow + def example_luma_ingest_flow(): + return luma_ingest_task(metadata_dir_path=metadata_dir_path) + + example_luma_ingest_flow() + ``` + """ + if not logger: + logger = get_run_logger() + + if isinstance(metadata_dir_path, str): + path_expanded = os.path.expandvars(metadata_dir_path) + metadata_dir_path = Path(path_expanded) + + luma_command = "ingest" if metadata_kind == "model" else "send-test-results" + follow_flag = "--follow" if follow else "" + command = ( + f"luma dbt {luma_command} -m {metadata_dir_path} -l {luma_url} {follow_flag}" + ) + + return await shell_run_command( + command=command, + env=env, + shell=shell, + return_all=return_all, + stream_level=stream_level, + logger=logger, + raise_on_failure=raise_on_failure, + ) diff --git a/src/viadot/orchestration/prefect/tasks/mindful.py b/src/viadot/orchestration/prefect/tasks/mindful.py new file mode 100644 index 000000000..2093490ae --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/mindful.py @@ -0,0 +1,79 @@ +"""Task to download data from Mindful API into a Pandas DataFrame.""" + +from datetime import date +from typing import Literal + +import pandas as pd +from prefect import get_run_logger, task + +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import Mindful + + +@task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) +def mindful_to_df( + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + region: Literal["us1", "us2", "us3", "ca1", "eu1", "au1"] = "eu1", + endpoint: str | None = None, + date_interval: list[date] | None = None, + limit: int = 1000, +) -> pd.DataFrame: + """Task to download data from Mindful API. + + Args: + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + region (Literal[us1, us2, us3, ca1, eu1, au1], optional): Survey Dynamix region + from where to interact with the mindful API. Defaults to "eu1" English + (United Kingdom). + endpoint (Optional[str], optional): API endpoint for an individual request. + Defaults to None. + date_interval (Optional[List[date]], optional): Date time range detailing the + starting date and the ending date. If no range is passed, one day of data + since this moment will be retrieved. Defaults to None. + limit (int, optional): The number of matching interactions to return. + Defaults to 1000. + + Examples: + data_frame = mindful_to_df( + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + region=region, + endpoint=end, + date_interval=date_interval, + limit=limit, + ) + + Returns: + pd.DataFrame: The response data as a pandas DataFrame. + """ + logger = get_run_logger() + + if not (azure_key_vault_secret or config_key): + raise MissingSourceCredentialsError + + if not config_key: + credentials = get_credentials(azure_key_vault_secret) + + if endpoint is None: + logger.warning( + "The API endpoint parameter was not defined. The default value is 'surveys'." + ) + endpoint = "surveys" + + mindful = Mindful( + credentials=credentials, + config_key=config_key, + region=region, + ) + mindful.api_connection( + endpoint=endpoint, + date_interval=date_interval, + limit=limit, + ) + + return mindful.to_df() diff --git a/src/viadot/orchestration/prefect/tasks/minio.py b/src/viadot/orchestration/prefect/tasks/minio.py new file mode 100644 index 000000000..0c9d047f6 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/minio.py @@ -0,0 +1,58 @@ +"""Task for uploading pandas DataFrame to MinIO.""" + +import contextlib +from typing import Literal + +import pandas as pd +from prefect import task +from prefect.logging import get_run_logger + + +with contextlib.suppress(ImportError): + from viadot.sources import MinIO + +from viadot.config import get_source_credentials +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def df_to_minio( + df: pd.DataFrame, + path: str, + credentials_secret: str | None = None, + config_key: str | None = None, + basename_template: str | None = None, + if_exists: Literal["error", "delete_matching", "overwrite_or_ignore"] = "error", +) -> None: + """Task for uploading the contents of a pandas DataFrame to MinIO. + + Args: + df (pd.DataFrame): Pandas dataframe to be uploaded. + path (str): Path to the MinIO file/folder. + credentials_secret (str, optional): The name of the secret storing + the credentials. Defaults to None. More info on: + https://docs.prefect.io/concepts/blocks/ + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + basename_template (str, optional): A template string used to generate + basenames of written data files. The token '{i}' will be replaced with + an automatically incremented integer. Defaults to None. + if_exists (Literal["error", "delete_matching", "overwrite_or_ignore"], + optional). What to do if the dataset already exists. Defaults to "error". + """ + if not (credentials_secret or config_key): + raise MissingSourceCredentialsError + + logger = get_run_logger() + + credentials = get_source_credentials(config_key) or get_credentials( + credentials_secret + ) + minio = MinIO(credentials=credentials) + + minio.from_df( + df=df, path=path, if_exists=if_exists, basename_template=basename_template + ) + + logger.info("Data has been uploaded successfully.") diff --git a/src/viadot/orchestration/prefect/tasks/outlook.py b/src/viadot/orchestration/prefect/tasks/outlook.py new file mode 100644 index 000000000..20b6a75e0 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/outlook.py @@ -0,0 +1,83 @@ +"""Task to download data from Outlook API into a Pandas DataFrame.""" + +import pandas as pd +from prefect import task + +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import Outlook + + +@task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) +def outlook_to_df( + mailbox_name: str, + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + request_retries: int = 10, + start_date: str | None = None, + end_date: str | None = None, + limit: int = 10000, + address_limit: int = 8000, + outbox_list: list[str] | None = None, +) -> pd.DataFrame: + """Task for downloading data from Outlook API to a pandas DataFrame. + + Args: + mailbox_name (str): Mailbox name. + config_key (Optional[str], optional): The key in the viadot config holding + relevant credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + request_retries (int, optional): How many times retries to authorizate. + Defaults to 10. + start_date (Optional[str], optional): A filtering start date parameter e.g. + "2022-01-01". Defaults to None. + end_date (Optional[str], optional): A filtering end date parameter e.g. + "2022-01-02". Defaults to None. + limit (int, optional): Number of fetched top messages. Defaults to 10000. + address_limit (int, optional): The maximum number of accepted characters in the + sum of all email names. Defaults to 8000. + outbox_list (List[str], optional): List of outbox folders to differentiate + between Inboxes and Outboxes. Defaults to ["Sent Items"]. + + Examples: + data_frame = mindful_to_df( + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + region=region, + endpoint=end, + date_interval=date_interval, + limit=limit, + ) + + Raises: + MissingSourceCredentialsError: If none credentials have been provided. + APIError: The mailbox name is a "must" requirement. + + Returns: + pd.DataFrame: The response data as a pandas DataFrame. + """ + if not (azure_key_vault_secret or config_key): + raise MissingSourceCredentialsError + + if not config_key: + credentials = get_credentials(azure_key_vault_secret) + + if not outbox_list: + outbox_list = ["Sent Items"] + + outlook = Outlook( + credentials=credentials, + config_key=config_key, + ) + outlook.api_connection( + mailbox_name=mailbox_name, + request_retries=request_retries, + start_date=start_date, + end_date=end_date, + limit=limit, + address_limit=address_limit, + outbox_list=outbox_list, + ) + + return outlook.to_df() diff --git a/src/viadot/orchestration/prefect/tasks/redshift_spectrum.py b/src/viadot/orchestration/prefect/tasks/redshift_spectrum.py new file mode 100644 index 000000000..248ab8e76 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/redshift_spectrum.py @@ -0,0 +1,75 @@ +"""Task for uploading pandas DataFrame to AWS Redshift Spectrum.""" + +import contextlib +from typing import Any, Literal + +import pandas as pd +from prefect import task +from prefect.logging import get_run_logger + + +with contextlib.suppress(ImportError): + from viadot.sources import RedshiftSpectrum + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def df_to_redshift_spectrum( # noqa: PLR0913 + df: pd.DataFrame, + to_path: str, + schema_name: str, + table: str, + extension: str = ".parquet", + if_exists: Literal["overwrite", "append"] = "overwrite", + partition_cols: list[str] | None = None, + index: bool = False, + compression: str | None = None, + sep: str = ",", + description: str | None = None, + config_key: str | None = None, + **kwargs: dict[str, Any] | None, +) -> None: + """Task to upload a pandas `DataFrame` to a csv or parquet file. + + Args: + df (pd.DataFrame): The Pandas DataFrame to ingest into Redshift Spectrum. + to_path (str): Path to a S3 folder where the table will be located. If needed, + a bottom-level directory named f"{table}" is automatically created, so + that files are always located in a folder named the same as the table. + schema_name (str): AWS Glue catalog database name. + table (str): AWS Glue catalog table name. + partition_cols (list[str]): List of column names that will be used to create + partitions. Only takes effect if dataset=True. + extension (str): Required file type. Accepted file formats are 'csv' and + 'parquet'. + if_exists (str, optional): 'overwrite' to recreate any possible existing table + or 'append' to keep any possible existing table. Defaults to overwrite. + partition_cols (list[str], optional): List of column names that will be used to + create partitions. Only takes effect if dataset=True. Defaults to None. + index (bool, optional): Write row names (index). Defaults to False. + compression (str, optional): Compression style (None, snappy, gzip, zstd). + sep (str, optional): Field delimiter for the output file. Applies only to '.csv' + extension. Defaults to ','. + description (str, optional): AWS Glue catalog table description. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + kwargs: The parameters to pass in awswrangler to_parquet/to_csv function. + """ + rs = RedshiftSpectrum(config_key=config_key) + + rs.from_df( + df=df, + to_path=to_path, + schema=schema_name, + table=table, + extension=extension, + if_exists=if_exists, + partition_cols=partition_cols, + index=index, + compression=compression, + sep=sep, + description=description, + **kwargs, + ) + + logger = get_run_logger() + logger.info("Data has been uploaded successfully.") diff --git a/src/viadot/orchestration/prefect/tasks/s3.py b/src/viadot/orchestration/prefect/tasks/s3.py new file mode 100644 index 000000000..9816ab595 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/s3.py @@ -0,0 +1,57 @@ +"""Task for uploading pandas DataFrame to Amazon S3.""" + +import contextlib +from typing import Any + +from prefect import task +from prefect.logging import get_run_logger + + +with contextlib.suppress(ImportError): + from viadot.sources import S3 + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def s3_upload_file( + from_path: str, + to_path: str, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, +) -> None: + """Task to upload a file to Amazon S3. + + Args: + from_path (str): Path to local file(s) to be uploaded. + to_path (str): Path to the Amazon S3 file/folder. + credentials (dict[str, Any], optional): Credentials to the Amazon S3. + Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + + Example: + ```python + from prefect_viadot.tasks import s3_upload_file + from prefect import flow + + @flow + def test_flow(): + s3_upload_file( + from_path='test.parquet', + to_path="s3://bucket_name/test.parquet", + credentials= { + 'profile_name': 'your_profile' + 'region_name': 'your_region' + 'aws_access_key_id': 'your_access_key_id' + 'aws_secret_access_key': 'your_secret_access_key' + } + ) + + test_flow() + ``` + """ + s3 = S3(credentials=credentials, config_key=config_key) + + s3.upload(from_path=from_path, to_path=to_path) + + logger = get_run_logger() + logger.info("Data has been uploaded successfully.") diff --git a/src/viadot/orchestration/prefect/tasks/sap_rfc.py b/src/viadot/orchestration/prefect/tasks/sap_rfc.py new file mode 100644 index 000000000..c162994f4 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/sap_rfc.py @@ -0,0 +1,123 @@ +"""Task for interacting with SAP.""" + +import contextlib +from typing import Any + +import pandas as pd +from prefect import task +from prefect.logging import get_run_logger + + +with contextlib.suppress(ImportError): + from viadot.sources import SAPRFC, SAPRFCV2 +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60 * 3) +def sap_rfc_to_df( # noqa: PLR0913 + query: str | None = None, + sep: str | None = None, + func: str | None = None, + replacement: str = "-", + rfc_total_col_width_character_limit: int = 400, + rfc_unique_id: list[str] | None = None, + tests: dict[str, Any] | None = None, + credentials_secret: str | None = None, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, + alternative_version: bool = False, +) -> pd.DataFrame: + """A task for querying SAP with SQL using the RFC protocol. + + Note that only a very limited subset of SQL is supported: + - aliases + - where clauses combined using the AND operator + - limit & offset + + Unsupported: + - aggregations + - joins + - subqueries + - etc. + + Args: + query (str): The query to be executed with pyRFC. + sep (str, optional): The separator to use when reading query results. If not + provided, multiple options are automatically tried. Defaults to None. + func (str, optional): SAP RFC function to use. Defaults to None. + replacement (str, optional): In case of sep is on a columns, set up a new + character to replace inside the string to avoid flow breakdowns. Defaults to + "-". + rfc_total_col_width_character_limit (int, optional): Number of characters by + which query will be split in chunks in case of too many columns for RFC + function. According to SAP documentation, the limit is 512 characters. + However, we observed SAP raising an exception even on a slightly lower + number of characters, so we add a safety margin. Defaults to 400. + rfc_unique_id (list[str], optional): + Reference columns to merge chunks DataFrames. These columns must to be + unique. If no columns are provided in this parameter, all data frame columns + will by concatenated. Defaults to None. + tests (dict[str], optional): A dictionary with optional list of tests + to verify the output dataframe. If defined, triggers the `validate` + function from viadot.utils. Defaults to None. + credentials_secret (str, optional): The name of the secret that stores SAP + credentials. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + credentials (dict[str, Any], optional): Credentials to SAP. + Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + alternative_version (bool, optional): Enable the use version 2 in source. + Defaults to False. + + Examples: + sap_rfc_to_df( + ... + rfc_unique_id=["VBELN", "LPRIO"], + ... + ) + """ + if not (credentials_secret or credentials or config_key): + raise MissingSourceCredentialsError + + if query is None: + msg = "Please provide the query." + raise ValueError(msg) + + logger = get_run_logger() + + credentials = credentials or get_credentials(credentials_secret) + + if alternative_version is True: + if rfc_unique_id: + logger.warning( + "If the column/set are not unique the table will be malformed." + ) + sap = SAPRFCV2( + sep=sep, + replacement=replacement, + credentials=credentials, + func=func, + rfc_total_col_width_character_limit=rfc_total_col_width_character_limit, + rfc_unique_id=rfc_unique_id, + ) + else: + sap = SAPRFC( + sep=sep, + func=func, + rfc_total_col_width_character_limit=rfc_total_col_width_character_limit, + credentials=credentials, + config_key=config_key, + ) + sap.query(query) + logger.info("Downloading data from SAP to a DataFrame...") + logger.debug(f"Running query: \n{query}.") + + df = sap.to_df(tests=tests) + + if not df.empty: + logger.info("Data has been downloaded successfully.") + elif df.empty: + logger.warn("Task finished but NO data was downloaded.") + return df diff --git a/src/viadot/orchestration/prefect/tasks/sharepoint.py b/src/viadot/orchestration/prefect/tasks/sharepoint.py new file mode 100644 index 000000000..4aef9eca1 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/sharepoint.py @@ -0,0 +1,113 @@ +"""Tasks for interacting with Microsoft Sharepoint.""" + +from typing import Any + +import pandas as pd +from prefect import get_run_logger, task + +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import Sharepoint + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def sharepoint_to_df( + url: str, + sheet_name: str | list[str | int] | int | None = None, + columns: str | list[str] | list[int] | None = None, + tests: dict[str, Any] | None = None, + file_sheet_mapping: dict | None = None, + na_values: list[str] | None = None, + credentials_secret: str | None = None, + config_key: str | None = None, +) -> pd.DataFrame: + """Load an Excel file stored on Microsoft Sharepoint into a pandas `DataFrame`. + + Modes: + If the `URL` ends with the file (e.g ../file.xlsx) it downloads only the file and + creates a DataFrame from it. + If the `URL` ends with the folder (e.g ../folder_name/): it downloads multiple files + and creates a DataFrame from them: + - If `file_sheet_mapping` is provided, it downloads and processes only + the specified files and sheets. + - If `file_sheet_mapping` is NOT provided, it downloads and processes all of + the files from the chosen folder. + + Args: + url (str): The URL to the file. + sheet_name (str | list | int, optional): Strings are used + for sheet names. Integers are used in zero-indexed sheet positions + (chart sheets do not count as a sheet position). Lists of strings/integers + are used to request multiple sheets. Specify None to get all worksheets. + Defaults to None. + columns (str | list[str] | list[int], optional): Which columns to ingest. + Defaults to None. + credentials_secret (str, optional): The name of the secret storing + the credentials. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + file_sheet_mapping (dict): A dictionary where keys are filenames and values are + the sheet names to be loaded from each file. If provided, only these files + and sheets will be downloaded. Defaults to None. + na_values (list[str] | None): Additional strings to recognize as NA/NaN. + If list passed, the specific NA values for each column will be recognized. + Defaults to None. + tests (dict[str], optional): A dictionary with optional list of tests + to verify the output dataframe. If defined, triggers the `validate` + function from viadot.utils. Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + + Returns: + pd.Dataframe: The pandas `DataFrame` containing data from the file. + """ + if not (credentials_secret or config_key): + raise MissingSourceCredentialsError + + logger = get_run_logger() + + credentials = get_credentials(secret_name=credentials_secret) + s = Sharepoint(credentials=credentials, config_key=config_key) + + logger.info(f"Downloading data from {url}...") + df = s.to_df( + url, + sheet_name=sheet_name, + tests=tests, + usecols=columns, + na_values=na_values, + file_sheet_mapping=file_sheet_mapping, + ) + logger.info(f"Successfully downloaded data from {url}.") + + return df + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60) +def sharepoint_download_file( + url: str, + to_path: str, + credentials_secret: str | None = None, + config_key: str | None = None, +) -> None: + """Download a file from Sharepoint. + + Args: + url (str): The URL of the file to be downloaded. + to_path (str): Where to download the file. + credentials_secret (str, optional): The name of the secret that stores + Sharepoint credentials. Defaults to None. + credentials (SharepointCredentials, optional): Sharepoint credentials. + config_key (str, optional): The key in the viadot config holding relevant + credentials. + """ + if not (credentials_secret or config_key): + raise MissingSourceCredentialsError + + logger = get_run_logger() + + credentials = get_credentials(secret_name=credentials_secret) + s = Sharepoint(credentials=credentials, config_key=config_key) + + logger.info(f"Downloading data from {url}...") + s.download_file(url=url, to_path=to_path) + logger.info(f"Successfully downloaded data from {url}.") diff --git a/src/viadot/orchestration/prefect/tasks/sql_server.py b/src/viadot/orchestration/prefect/tasks/sql_server.py new file mode 100644 index 000000000..93ae51449 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/sql_server.py @@ -0,0 +1,140 @@ +"""Prefect tasks for working with SQL Server.""" + +from typing import Any, Literal + +import pandas as pd +from prefect import task +from prefect.logging import get_run_logger + +from viadot.config import get_source_credentials +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources.base import Record +from viadot.sources.sql_server import SQLServer + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60 * 3) +def create_sql_server_table( + schema: str | None = None, + table: str | None = None, + dtypes: dict[str, Any] | None = None, + if_exists: Literal["fail", "replace", "skip", "delete"] = "fail", + credentials_secret: str | None = None, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, +) -> None: + """Create a table in SQL Server. + + Args: + schema (str, optional): Destination schema. + table (str, optional): Destination table. + dtypes (dict[str, Any], optional): Data types to enforce. + if_exists (Literal, optional): What to do if the table already exists. + credentials (dict[str, Any], optional): Credentials to the SQLServer. + Defaults to None. + credentials_secret (str, optional): The name of the secret storing + the credentials. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + """ + if not (credentials_secret or credentials or config_key): + raise MissingSourceCredentialsError + + logger = get_run_logger() + + credentials = ( + credentials + or get_credentials(credentials_secret) + or get_source_credentials(config_key) + ) + sql_server = SQLServer(credentials=credentials) + + fqn = f"{schema}.{table}" if schema is not None else table + created = sql_server.create_table( + schema=schema, table=table, dtypes=dtypes, if_exists=if_exists + ) + if created: + logger.info(f"Successfully created table {fqn}.") + else: + logger.info( + f"Table {fqn} has not been created as if_exists is set to {if_exists}." + ) + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60 * 3) +def sql_server_to_df( + query: str, + credentials_secret: str | None = None, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, +) -> pd.DataFrame: + """Execute a query and load the result into a pandas DataFrame. + + Args: + query (str, required): The query to execute on the SQL Server database. + If the query doesn't start with "SELECT" returns an empty DataFrame. + credentials (dict[str, Any], optional): Credentials to the SQLServer. + Defaults to None. + credentials_secret (str, optional): The name of the secret storing + the credentials. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + """ + if not (credentials_secret or credentials or config_key): + raise MissingSourceCredentialsError + + logger = get_run_logger() + + credentials = ( + credentials + or get_source_credentials(config_key) + or get_credentials(credentials_secret) + ) + sql_server = SQLServer(credentials=credentials) + df = sql_server.to_df(query=query) + nrows = df.shape[0] + ncols = df.shape[1] + + logger.info( + f"Successfully downloaded {nrows} rows and {ncols} columns of data to a DataFrame." + ) + return df + + +@task(retries=3, retry_delay_seconds=10, timeout_seconds=60 * 60 * 3) +def sql_server_query( + query: str, + credentials_secret: str | None = None, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, +) -> list[Record] | bool: + """Execute a query on SQL Server. + + Args: + query (str, required): The query to execute on the SQL Server database. + credentials (dict[str, Any], optional): Credentials to the SQLServer. + Defaults to None. + credentials_secret (str, optional): The name of the secret storing + the credentials. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + + """ + if not (credentials_secret or credentials or config_key): + raise MissingSourceCredentialsError + + logger = get_run_logger() + + credentials = ( + credentials + or get_source_credentials(config_key) + or get_credentials(credentials_secret) + ) + sql_server = SQLServer(credentials=credentials) + result = sql_server.run(query) + + logger.info("Successfully ran the query.") + return result diff --git a/src/viadot/orchestration/prefect/tasks/task_utils.py b/src/viadot/orchestration/prefect/tasks/task_utils.py new file mode 100644 index 000000000..28e201ea1 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/task_utils.py @@ -0,0 +1,244 @@ +"""Utility tasks.""" + +import json +from pathlib import Path +from typing import Any, Literal + +import pandas as pd +from prefect import task +from prefect.logging import get_run_logger +from visions.functional import infer_type +from visions.typesets.complete_set import CompleteSet + + +@task +def dtypes_to_json_task(dtypes_dict: dict[str, Any], local_json_path: str) -> None: + """Creates json file from a dictionary. + + Args: + dtypes_dict (dict): Dictionary containing data types. + local_json_path (str): Path to local json file. + """ + with Path(local_json_path).open("w") as fp: + json.dump(dtypes_dict, fp) + + +@task +def chunk_df(df: pd.DataFrame, size: int = 10_000) -> list[pd.DataFrame]: + """Chunks a data frame into multiple smaller data frames of a specified size. + + Args: + df (pd.DataFrame): Input pandas DataFrame. + size (int, optional): Size of a chunk. Defaults to 10000. + """ + n_rows = df.shape[0] + return [df[i : i + size] for i in range(0, n_rows, size)] + + +@task +def df_get_data_types_task(df: pd.DataFrame) -> dict: + """Returns dictionary containing datatypes of pandas DataFrame columns. + + Args: + df (pd.DataFrame): Input pandas DataFrame. + """ + typeset = CompleteSet() + dtypes = infer_type(df, typeset) + return {k: str(v) for k, v in dtypes.items()} + + +@task +def get_sql_dtypes_from_df(df: pd.DataFrame) -> dict: + """Obtain SQL data types from a pandas DataFrame.""" + typeset = CompleteSet() + dtypes = infer_type(df.head(10000), typeset) + dtypes_dict = {k: str(v) for k, v in dtypes.items()} + dict_mapping = { + "Float": "REAL", + "Image": None, + "Categorical": "VARCHAR(500)", + "Time": "TIME", + "Boolean": "VARCHAR(5)", # Bool is True/False, Microsoft expects 0/1 + "DateTime": "DATETIMEOFFSET", # DATETIMEOFFSET is the only timezone-aware dtype in TSQL + "Object": "VARCHAR(500)", + "EmailAddress": "VARCHAR(50)", + "File": None, + "Geometry": "GEOMETRY", + "Ordinal": "INT", + "Integer": "INT", + "Generic": "VARCHAR(500)", + "UUID": "VARCHAR(50)", # Microsoft uses a custom UUID format so we can't use it + "Complex": None, + "Date": "DATE", + "String": "VARCHAR(500)", + "IPAddress": "VARCHAR(39)", + "Path": "VARCHAR(255)", + "TimeDelta": "VARCHAR(20)", # datetime.datetime.timedelta; eg. '1 days 11:00:00' + "URL": "VARCHAR(255)", + "Count": "INT", + } + dict_dtypes_mapped = {} + for k in dtypes_dict: + dict_dtypes_mapped[k] = dict_mapping[dtypes_dict[k]] + + # This is required as pandas cannot handle mixed dtypes in Object columns + return { + k: ("String" if v == "Object" else str(v)) + for k, v in dict_dtypes_mapped.items() + } + + +@task +def df_map_mixed_dtypes_for_parquet( + df: pd.DataFrame, dtypes_dict: dict +) -> pd.DataFrame: + """Handle mixed dtypes in DataFrame columns. + + Mapping 'object' visions dtype to 'string' dtype to allow Pandas to_parquet + + Args: + dict_dtypes_mapped (dict): Data types dictionary, inferred by Visions. + df (pd.DataFrame): input DataFrame. + + Returns: + df_mapped (pd.DataFrame): Pandas DataFrame with mapped Data Types to workaround + Pandas to_parquet bug connected with mixed dtypes in object:. + """ + df_mapped = df.copy() + for col, dtype in dtypes_dict.items(): + if dtype == "Object": + df_mapped[col] = df_mapped[col].astype("string") + return df_mapped + + +@task +def update_dtypes_dict(dtypes_dict: dict) -> dict: + """Task to update dtypes_dictionary that will be stored in the schema. + + It's required due to workaround Pandas to_parquet bug connected with mixed dtypes in + object. + + Args: + dtypes_dict (dict): Data types dictionary inferred by Visions. + + Returns: + dtypes_dict_updated (dict): Data types dictionary updated to follow Pandas + requirements in to_parquet functionality. + """ + return {k: ("String" if v == "Object" else str(v)) for k, v in dtypes_dict.items()} + + +@task +def df_to_csv( + df: pd.DataFrame, + path: str, + sep: str = "\t", + if_exists: Literal["append", "replace", "skip"] = "replace", + **kwargs, +) -> None: + r"""Write data from a pandas DataFrame to a CSV file. + + Args: + df (pd.DataFrame): Input pandas DataFrame. + path (str): Path to output csv file. + sep (str, optional): The separator to use in the CSV. Defaults to "\t". + if_exists (Literal["append", "replace", "skip"], optional): What to do if the + table exists. Defaults to "replace". + """ + logger = get_run_logger() + + if Path(path).exists(): + if if_exists == "append": + existing_df = pd.read_csv(path, sep=sep) + out_df = pd.concat([existing_df, df]) + elif if_exists == "replace": + out_df = df + elif if_exists == "skip": + logger.info("Skipped.") + return + else: + out_df = df + + # Create directories if they don't exist. + Path(path).mkdir(parents=True, exist_ok=True) + + out_df.to_csv(path, index=False, sep=sep, **kwargs) + + +@task +def df_to_parquet( + df: pd.DataFrame, + path: str, + if_exists: Literal["append", "replace", "skip"] = "replace", + **kwargs, +) -> None: + """Task to create parquet file based on pandas DataFrame. + + Args: + df (pd.DataFrame): Input pandas DataFrame. + path (str): Path to output parquet file. + if_exists (Literal["append", "replace", "skip"], optional): What to do if the + table exists. Defaults to "replace". + """ + logger = get_run_logger() + + if Path(path).exists(): + if if_exists == "append": + existing_df = pd.read_parquet(path) + out_df = pd.concat([existing_df, df]) + elif if_exists == "replace": + out_df = df + elif if_exists == "skip": + logger.info("Skipped.") + return + else: + out_df = df + + # Create directories if they don't exist. + Path(path).mkdir(parents=True, exist_ok=True) + + out_df.to_parquet(path, index=False, **kwargs) + + +@task +def union_dfs_task(dfs: list[pd.DataFrame]) -> pd.DataFrame: + """Create one DataFrame from a list of pandas DataFrames. + + Args: + dfs (List[pd.DataFrame]): List of pandas data frames to concat. In case of + different size of DataFrames NaN values can appear. + """ + return pd.concat(dfs, ignore_index=True) + + +@task +def df_clean_column( + df: pd.DataFrame, columns_to_clean: list[str] | None = None +) -> pd.DataFrame: + """Remove special characters from a pandas DataFrame. + + Args: + df (pd.DataFrame): The DataFrame to clean. + columns_to_clean (List[str]): A list of columns to clean. Defaults is None. + + Returns: + pd.DataFrame: The cleaned DataFrame + """ + df = df.copy() + + if columns_to_clean is None: + df.replace( + to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], + value=["", ""], + regex=True, + inplace=True, + ) + else: + for col in columns_to_clean: + df[col].replace( + to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], + value=["", ""], + regex=True, + inplace=True, + ) + return df diff --git a/src/viadot/orchestration/prefect/utils.py b/src/viadot/orchestration/prefect/utils.py new file mode 100644 index 000000000..91778f1e2 --- /dev/null +++ b/src/viadot/orchestration/prefect/utils.py @@ -0,0 +1,228 @@ +"""Common utilities for use in tasks.""" + +import contextlib +import json +from json.decoder import JSONDecodeError +import logging +import os +import sys +import tempfile +from typing import Any + +import anyio +from anyio import open_process +from anyio.streams.text import TextReceiveStream +from prefect.blocks.system import Secret +from prefect.client.orchestration import PrefectClient +from prefect.settings import PREFECT_API_KEY, PREFECT_API_URL + + +with contextlib.suppress(ModuleNotFoundError): + from prefect_aws.secrets_manager import AwsSecret +from prefect_sqlalchemy import DatabaseCredentials + +from viadot.orchestration.prefect.exceptions import MissingPrefectBlockError + + +with contextlib.suppress(ModuleNotFoundError): + from prefect_azure import AzureKeyVaultSecretReference + + +async def list_block_documents() -> list[Any]: + """Retrieve list of Prefect block documents.""" + async with PrefectClient( + api=PREFECT_API_URL.value(), api_key=PREFECT_API_KEY.value() + ) as client: + return await client.read_block_documents() + + +def _get_azure_credentials(secret_name: str) -> dict[str, Any]: + """Retrieve credentials from the Prefect 'AzureKeyVaultSecretReference' block. + + Args: + secret_name (str): The name of the secret to be retrieved. + + Returns: + dict: A dictionary containing the credentials. + """ + try: + credentials = json.loads( + AzureKeyVaultSecretReference.load(secret_name).get_secret() + ) + except JSONDecodeError: + credentials = AzureKeyVaultSecretReference.load(secret_name).get_secret() + + return credentials + + +def _get_aws_credentials(secret_name: str) -> dict[str, Any] | str: + """Retrieve credentials from the Prefect 'AwsSecret' block document. + + Args: + secret_name (str): The name of the secret to be retrieved. + + Returns: + dict | str: A dictionary or a string containing the credentials. + """ + aws_secret_block = AwsSecret.load(secret_name) + credentials = aws_secret_block.read_secret() + return json.loads(credentials) + + +def _get_secret_credentials(secret_name: str) -> dict[str, Any] | str: + """Retrieve credentials from the Prefect 'Secret' block document. + + Args: + secret_name (str): The name of the secret to be retrieved. + + Returns: + dict | str: A dictionary or a string containing the credentials. + """ + secret = Secret.load(secret_name).get() + try: + credentials = json.loads(secret) + except json.JSONDecodeError: + credentials = secret + + return credentials + + +def _get_database_credentials(secret_name: str) -> dict[str, Any] | str: + """Retrieve credentials from the Prefect 'DatabaseCredentials' block document. + + Args: + secret_name (str): The name of the secret to be retrieved. + + Returns: + dict | str: A dictionary or a string containing the credentials. + """ + secret = DatabaseCredentials.load(name=secret_name).dict() + + credentials = secret + credentials["user"] = secret.get("username") + credentials["db_name"] = secret.get("database") + credentials["password"] = secret.get("password").get_secret_value() + if secret.get("port"): + credentials["server"] = secret.get("host") + "," + str(secret.get("port")) + else: + credentials["server"] = secret.get("host") + + return credentials + + +def get_credentials(secret_name: str) -> dict[str, Any]: + """Retrieve credentials from the Prefect block document. + + Args: + secret_name (str): The name of the secret to be retrieved. + + Returns: + dict: A dictionary containing the credentials. + """ + # Prefect does not allow upper case letters for blocks, + # so some names might be lowercased versions of the original + + secret_name_lowercase = secret_name.lower() + blocks = anyio.run(list_block_documents) + + for block in blocks: + if block.name == secret_name_lowercase: + block_type = block.block_schema.fields["title"] + break + else: + msg = "The provided secret name is not valid." + raise MissingPrefectBlockError(msg) + + if block_type == "AwsSecret": + credentials = _get_aws_credentials(secret_name) + elif block_type == "AzureKeyVaultSecretReference": + credentials = _get_azure_credentials(secret_name) + elif block_type == "DatabaseCredentials": + credentials = _get_database_credentials(secret_name) + elif block_type == "Secret": + credentials = _get_secret_credentials(secret_name) + else: + msg = f"The provided secret block type: {block_type} is not supported" + raise MissingPrefectBlockError(msg) + + return credentials + + +async def shell_run_command( + command: str, + env: dict[str, Any] | None = None, + helper_command: str | None = None, + shell: str = "bash", + return_all: bool = False, + stream_level: int = logging.INFO, + logger: logging.Logger | None = None, + raise_on_failure: bool = True, +) -> list[str] | str: + """Runs arbitrary shell commands as a util. + + Args: + command: Shell command to be executed; can also be + provided post-initialization by calling this task instance. + env: Dictionary of environment variables to use for + the subprocess; can also be provided at runtime. + helper_command: String representing a shell command, which + will be executed prior to the `command` in the same process. + Can be used to change directories, define helper functions, etc. + for different commands in a flow. + shell: Shell to run the command with; defaults to "bash". + return_all: Whether this task should return all lines of stdout as a list, + or just the last line as a string; defaults to `False`. + stream_level: The logging level of the stream. + logger: Can pass a desired logger; if not passed, will automatically + gets a run logger from Prefect. + raise_on_failure: Whether to raise an exception if the command fails. + + Returns: + If return all, returns all lines as a list; else the last line as a string. + + Example: + Echo "hey it works". + ```python + from prefect_shell.utils import shell_run_command + await shell_run_command("echo hey it works") + ``` + """ + if logger is None: + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger("prefect_shell.utils") + + current_env = os.environ.copy() + current_env.update(env or {}) + + with tempfile.NamedTemporaryFile(prefix="prefect-") as tmp: + if helper_command: + tmp.write(helper_command.encode()) + tmp.write(os.linesep.encode()) + tmp.write(command.encode()) + tmp.flush() + + shell_command = [shell, tmp.name] + if sys.platform == "win32": + shell_command = " ".join(shell_command) + + lines = [] + async with await open_process(shell_command, env=env) as process: + async for text in TextReceiveStream(process.stdout): + logger.log(stream_level, text) + lines.extend(text.rstrip().split("\n")) + + await process.wait() + if process.returncode: + stderr = "\n".join( + [text async for text in TextReceiveStream(process.stderr)] + ) + if not stderr and lines: + stderr = f"{lines[-1]}\n" + msg = ( + f"Command failed with exit code {process.returncode}:\n" f"{stderr}" + ) + if raise_on_failure: + raise RuntimeError(msg) + lines.append(msg) + + return lines if return_all else lines[-1] diff --git a/src/viadot/signals.py b/src/viadot/signals.py index 9f3f418cd..9db26f9d8 100644 --- a/src/viadot/signals.py +++ b/src/viadot/signals.py @@ -1,2 +1,5 @@ -class SKIP(Exception): +"""Signals, used to control pipeline behavior.""" + + +class SKIP(Exception): # noqa: N818 pass diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index 0b2323c88..0d77c1bf2 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -1,33 +1,65 @@ +"""Source imports.""" + from importlib.util import find_spec -from viadot.sources.azure_data_lake import AzureDataLake -from viadot.sources.cloud_for_customers import CloudForCustomers -from viadot.sources.exchange_rates import ExchangeRates -from viadot.sources.genesys import Genesys -from viadot.sources.minio import MinIO -from viadot.sources.redshift_spectrum import RedshiftSpectrum -from viadot.sources.s3 import S3 -from viadot.sources.sharepoint import Sharepoint -from viadot.sources.trino import Trino +from .cloud_for_customers import CloudForCustomers +from .duckdb import DuckDB +from .exchange_rates import ExchangeRates +from .genesys import Genesys +from .hubspot import Hubspot +from .mindful import Mindful +from .outlook import Outlook +from .sharepoint import Sharepoint +from .sql_server import SQLServer +from .trino import Trino +from .uk_carbon_intensity import UKCarbonIntensity + __all__ = [ - "AzureDataLake", "CloudForCustomers", "ExchangeRates", "Genesys", - "MinIO", - "RedshiftSpectrum", - "S3", + "Outlook", + "Hubspot", + "Mindful", "Sharepoint", "Trino", + "SQLServer", + "UKCarbonIntensity", ] +if find_spec("adlfs"): + from viadot.sources.azure_data_lake import AzureDataLake # noqa: F401 + + __all__.extend(["AzureDataLake"]) + +if find_spec("duckdb"): + from viadot.sources.duckdb import DuckDB # noqa: F401 + + __all__.extend(["DuckDB"]) + +if find_spec("redshift_connector"): + from viadot.sources.redshift_spectrum import RedshiftSpectrum # noqa: F401 + + __all__.extend(["RedshiftSpectrum"]) + +if find_spec("s3fs"): + from viadot.sources.s3 import S3 # noqa: F401 + + __all__.extend(["S3"]) + +if find_spec("s3fs"): + from viadot.sources.minio import MinIO # noqa: F401 + + __all__.extend(["MinIO"]) + + if find_spec("pyrfc"): - from viadot.sources.sap_rfc import SAPRFC, SAPRFCV2 # noqa + from viadot.sources.sap_rfc import SAPRFC, SAPRFCV2 # noqa: F401 __all__.extend(["SAPRFC", "SAPRFCV2"]) if find_spec("pyspark"): - from viadot.sources.databricks import Databricks # noqa + from viadot.sources.databricks import Databricks # noqa: F401 __all__.append("Databricks") diff --git a/src/viadot/sources/azure_data_lake.py b/src/viadot/sources/azure_data_lake.py index 9cdd75df3..53647c09f 100644 --- a/src/viadot/sources/azure_data_lake.py +++ b/src/viadot/sources/azure_data_lake.py @@ -1,8 +1,16 @@ -import os -from typing import Any, Dict, List +"""A module for working with Azure Data Lake (gen1 and gen2).""" + +from pathlib import Path +from typing import Any import pandas as pd -from adlfs import AzureBlobFileSystem, AzureDatalakeFileSystem + + +try: + from adlfs import AzureBlobFileSystem, AzureDatalakeFileSystem +except ModuleNotFoundError as e: + msg = "Missing required modules to use AzureDataLake source." + raise ImportError(msg) from e from viadot.config import get_source_credentials from viadot.exceptions import CredentialError @@ -11,59 +19,64 @@ class AzureDataLake(Source): - """ - A class for pulling data from the Azure Data Lakes (gen1 and gen2). + def __init__( + self, + path: str | None = None, + gen: int = 2, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, + *args, + **kwargs, + ): + """A class for pulling data from the Azure Data Lakes (gen1 and gen2). + + You can either connect to the lake in general: + lake = AzureDataLake(); lake.exists("a/b/c.csv") - You can either connect to the lake in general - (`lake = AzureDataLake(); lake.exists("a/b/c.csv")`), - or to a particular path (`lake = AzureDataLake(path="a/b/c.csv"); lake.exists()`) + or to a particular path: + lake = AzureDataLake(path="a/b/c.csv"); lake.exists()` - Args: + Args: credentials (Dict[str, Any], optional): A dictionary containing the following credentials: `account_name`, `tenant_id`, `client_id`, and `client_secret`. config_key (str, optional): The key in the viadot config holding relevant credentials. - """ - - def __init__( - self, - path: str = None, - gen: int = 2, - credentials: Dict[str, Any] = None, - config_key: str = None, - *args, - **kwargs, - ): + """ credentials = credentials or get_source_credentials(config_key) + credentials = {key.lower(): value for key, value in credentials.items()} + required_credentials = ( "account_name", - "tenant_id", - "client_id", - "client_secret", + "azure_tenant_id", + "azure_client_id", + "azure_client_secret", ) required_credentials_are_provided = all( - [rc in credentials for rc in required_credentials] + rc in credentials for rc in required_credentials ) if credentials is None: - raise CredentialError("Please provide the credentials.") - elif not required_credentials_are_provided: - raise CredentialError("Please provide all required credentials.") + msg = "Please provide the credentials." + raise CredentialError(msg) + + if not required_credentials_are_provided: + msg = "Please provide all required credentials." + raise CredentialError(msg) super().__init__(*args, credentials=credentials, **kwargs) storage_account_name = self.credentials["account_name"] - tenant_id = self.credentials["tenant_id"] - client_id = self.credentials["client_id"] - client_secret = self.credentials["client_secret"] + tenant_id = self.credentials["azure_tenant_id"] + client_id = self.credentials["azure_client_id"] + client_secret = self.credentials["azure_client_secret"] self.path = path self.gen = gen self.storage_options = { - "tenant_id": tenant_id, - "client_id": client_id, - "client_secret": client_secret, + "azure_tenant_id": tenant_id, + "azure_client_id": client_id, + "azure_client_secret": client_secret, } if gen == 1: self.fs = AzureDatalakeFileSystem( @@ -73,7 +86,7 @@ def __init__( client_secret=client_secret, ) self.base_url = f"adl://{storage_account_name}" - elif gen == 2: + elif gen == 2: # noqa: PLR2004 self.storage_options["account_name"] = storage_account_name self.fs = AzureBlobFileSystem( account_name=storage_account_name, @@ -86,12 +99,11 @@ def __init__( def upload( self, from_path: str, - to_path: str = None, + to_path: str | None = None, recursive: bool = False, overwrite: bool = False, ) -> None: - """ - Upload file(s) to the lake. + """Upload file(s) to the lake. Args: from_path (str): Path to the local file(s) to be uploaded. @@ -107,11 +119,9 @@ def upload( lake.upload(from_path='tests/test.csv', to_path="sandbox/test.csv") ``` """ - if self.gen == 1: - raise NotImplementedError( - "Azure Data Lake Gen1 does not support simple file upload." - ) + msg = "Azure Data Lake Gen1 does not support simple file upload." + raise NotImplementedError(msg) to_path = to_path or self.path @@ -124,21 +134,19 @@ def upload( recursive=recursive, overwrite=overwrite, ) - except FileExistsError: - # Show a useful error message. + except FileExistsError as e: if recursive: - msg = f"At least one file in '{to_path}' already exists. Specify `overwrite=True` to overwrite." # noqa + msg = f"At least one file in '{to_path}' already exists. Specify `overwrite=True` to overwrite." else: msg = f"The file '{to_path}' already exists. Specify `overwrite=True` to overwrite." - raise FileExistsError(msg) + raise FileExistsError(msg) from e self.logger.info( f"Successfully uploaded file(s) from '{from_path}' to '{to_path}'." ) - def exists(self, path: str = None) -> bool: - """ - Check if a location exists in Azure Data Lake. + def exists(self, path: str | None = None) -> bool: + """Check if a location exists in Azure Data Lake. Args: path (str): The path to check. Can be a file or a directory. @@ -160,14 +168,24 @@ def exists(self, path: str = None) -> bool: def download( self, to_path: str, - from_path: str = None, + from_path: str | None = None, recursive: bool = False, overwrite: bool = True, ) -> None: + """Download file(s) from the lake. + + Args: + to_path (str): _description_ + from_path (str | None, optional): _description_. Defaults to None. + recursive (bool, optional): _description_. Defaults to False. + overwrite (bool, optional): _description_. Defaults to True. + + Raises: + NotImplementedError: _description_ + """ if overwrite is False: - raise NotImplementedError( - "Currently, only the default behavior (overwrite) is available." - ) + msg = "Currently, only the default behavior (overwrite) is available." + raise NotImplementedError(msg) from_path = from_path or self.path self.fs.download(rpath=from_path, lpath=to_path, recursive=recursive) @@ -175,17 +193,32 @@ def download( @add_viadot_metadata_columns def to_df( self, - path: str = None, + path: str | None = None, sep: str = "\t", quoting: int = 0, - lineterminator: str = None, - error_bad_lines: bool = None, - ): + lineterminator: str | None = None, + error_bad_lines: bool | None = None, + ) -> pd.DataFrame: + r"""Download a file from the lake and return it as a pandas DataFrame. + + Args: + path (str, optional): _description_. Defaults to None. + sep (str, optional): _description_. Defaults to "\t". + quoting (int, optional): _description_. Defaults to 0. + lineterminator (str, optional): _description_. Defaults to None. + error_bad_lines (bool, optional): _description_. Defaults to None. + + Raises: + ValueError: _description_ + + Returns: + _type_: _description_ + """ if quoting is None: quoting = 0 path = path or self.path - url = os.path.join(self.base_url, path) + url = self.base_url + "/" + path.strip("/") if url.endswith(".csv"): df = pd.read_csv( @@ -199,13 +232,13 @@ def to_df( elif url.endswith(".parquet"): df = pd.read_parquet(url, storage_options=self.storage_options) else: - raise ValueError("Only CSV and parquet formats are supported.") + msg = "Only CSV and parquet formats are supported." + raise ValueError(msg) return df - def ls(self, path: str = None) -> List[str]: - """ - Returns a list of files in a path. + def ls(self, path: str | None = None) -> list[str]: + """Returns a list of files in a path. Args: path (str, optional): Path to a folder. Defaults to None. @@ -213,9 +246,8 @@ def ls(self, path: str = None) -> List[str]: path = path or self.path return self.fs.ls(path) - def rm(self, path: str = None, recursive: bool = False): - """ - Deletes files in a path. + def rm(self, path: str | None = None, recursive: bool = False) -> None: + """Delete files in a path. Args: path (str, optional): Path to a folder. Defaults to None. @@ -225,34 +257,40 @@ def rm(self, path: str = None, recursive: bool = False): path = path or self.path self.fs.rm(path, recursive=recursive) - def cp(self, from_path: str = None, to_path: str = None, recursive: bool = False): - """ - Copies the contents of `from_path` to `to_path`. + def cp( + self, + from_path: str | None = None, + to_path: str | None = None, + recursive: bool = False, + ) -> None: + """Copies the contents of `from_path` to `to_path`. Args: - from_path (str, optional): Path from which to copy file(s). - Defauls to None. - to_path (str, optional): Path where to copy file(s). Defaults - to None. - recursive (bool, optional): Whether to copy file(s) recursively. - Defaults to False. + from_path (str, optional): Path from which to copy file(s). Defaults to + None. + to_path (str, optional): Path where to copy file(s). Defaults to None. + recursive (bool, optional): Whether to copy file(s) recursively. Defaults to + False. """ from_path = from_path or self.path - to_path = to_path self.fs.cp(from_path, to_path, recursive=recursive) def from_df( - self, df: pd.DataFrame, path: str = None, overwrite: bool = False + self, + df: pd.DataFrame, + path: str | None = None, + sep: str = "\t", + overwrite: bool = False, ) -> None: - """ - Upload a pandas `DataFrame` to a file on Azure Data Lake. + r"""Upload a pandas `DataFrame` to a file on Azure Data Lake. Args: df (pd.DataFrame): The pandas `DataFrame` to upload. path (str, optional): The destination path. Defaults to None. + sep (str, optional): The separator to use in the `to_csv()` function. + Defaults to "\t". overwrite (bool): Whether to overwrite the file if it exist. """ - path = path or self.path extension = path.split(".")[-1] @@ -268,10 +306,10 @@ def from_df( # Can do it simply like this if ADLS accesses are set up correctly # url = os.path.join(self.base_url, path) # df.to_csv(url, storage_options=self.storage_options) - df.to_csv(file_name, index=False) + df.to_csv(file_name, index=False, sep=sep) else: df.to_parquet(file_name, index=False) self.upload(from_path=file_name, to_path=path, overwrite=overwrite) - os.remove(file_name) + Path(file_name).unlink() diff --git a/src/viadot/sources/base.py b/src/viadot/sources/base.py index 6605868af..05443a6fa 100644 --- a/src/viadot/sources/base.py +++ b/src/viadot/sources/base.py @@ -1,7 +1,9 @@ -import logging -import os +"""Base classes for data sources.""" + from abc import abstractmethod -from typing import Any, Dict, List, Literal, NoReturn, Tuple, Union +import logging +from pathlib import Path +from typing import Any, Literal import pandas as pd import pyarrow as pa @@ -10,55 +12,69 @@ from viadot.config import get_source_credentials from viadot.signals import SKIP + logger = logging.getLogger(__name__) -Record = Tuple[Any] +Record = tuple[Any] class Source: - def __init__(self, *args, credentials: Dict[str, Any] = None, **kwargs): + def __init__(self, *args, credentials: dict[str, Any] | None = None, **kwargs): # noqa: ARG002 + """Base class for data sources. + + Args: + credentials (dict[str, Any] | None, optional): The credentials for the + source. Defaults to None. + """ self.credentials = credentials self.data: pa.Table = None self.logger = logger @abstractmethod - def to_json(self): - pass + def to_json(self) -> dict: + """Download data from source to a dictionary.""" @abstractmethod - def to_df(self, if_empty: Literal["warn", "skip", "fail"] = "warn"): - pass + def to_df(self, if_empty: Literal["warn", "skip", "fail"] = "warn") -> pd.DataFrame: + """Download data from source to a pandas DataFrame. + + Args: + if_empty (Literal[warn, skip, fail], optional): What to do if there is no + data. Defaults to "warn". + + Returns: + pd.DataFrame: The data from the source as a pandas DataFrame. + """ @abstractmethod - def query(): + def query(self) -> list[Record] | bool: + """Run a query and possibly return the results.""" pass def to_arrow(self, if_empty: Literal["warn", "skip", "fail"] = "warn") -> pa.Table: - """ - Creates a pyarrow table from source. + """Creates a pyarrow table from source. Args: - if_empty (Literal["warn", "skip", "fail"], optional): : What to do if data sourse contains no data. Defaults to "warn". + if_empty (Literal["warn", "skip", "fail"], optional): : What to do if data + source contains no data. Defaults to "warn". """ - try: df = self.to_df(if_empty=if_empty) except SKIP: return False - table = pa.Table.from_pandas(df) - return table + return pa.Table.from_pandas(df) def to_csv( self, path: str, if_exists: Literal["append", "replace"] = "replace", if_empty: Literal["warn", "skip", "fail"] = "warn", - sep="\t", + sep: str = "\t", **kwargs, ) -> bool: - """ - Write from source to a CSV file. + r"""Write from source to a CSV file. + Note that the source can be a particular file or table, but also a database in general. Therefore, some sources may require additional parameters to pull the right resource. Hence this method @@ -66,10 +82,10 @@ def to_csv( Args: path (str): The destination path. - if_exists (Literal[, optional): What to do if the file exists. - Defaults to "replace". - if_empty (Literal["warn", "skip", "fail"], optional): What to do if the source contains no data. - Defaults to "warn". + if_exists (Literal[, optional): What to do if the file exists. Defaults to + "replace". + if_empty (Literal["warn", "skip", "fail"], optional): What to do if the + source contains no data. Defaults to "warn". sep (str, optional): The separator to use in the CSV. Defaults to "\t". Raises: @@ -78,7 +94,6 @@ def to_csv( Returns: bool: Whether the operation was successful. """ - try: df = self.to_df(if_empty=if_empty, **kwargs) except SKIP: @@ -89,11 +104,10 @@ def to_csv( elif if_exists == "replace": mode = "w" else: - raise ValueError("'if_exists' must be one of ['append', 'replace']") + msg = "'if_exists' must be one of ['append', 'replace']" + raise ValueError(msg) - df.to_csv( - path, sep=sep, mode=mode, index=False, header=not os.path.exists(path) - ) + df.to_csv(path, sep=sep, mode=mode, index=False, header=not Path(path).exists()) return True @@ -103,22 +117,23 @@ def to_excel( if_exists: str = "replace", if_empty: Literal["warn", "skip", "fail"] = "warn", ) -> bool: - """ - Write from source to a excel file. + """Write from source to a excel file. + Args: path (str): The destination path. - if_exists (str, optional): What to do if the file exists. Defaults to "replace". - if_empty (Literal["warn", "skip", "fail"], optional): What to do if the source contains no data. + if_exists (str, optional): What to do if the file exists. Defaults to + "replace". + if_empty (Literal["warn", "skip", "fail"], optional): What to do if the + source contains no data. """ - try: df = self.to_df(if_empty=if_empty) except SKIP: return False if if_exists == "append": - if os.path.isfile(path): + if Path(path).is_file(): excel_df = pd.read_excel(path) out_df = pd.concat([excel_df, df]) else: @@ -132,8 +147,8 @@ def _handle_if_empty( self, if_empty: Literal["warn", "skip", "fail"] = "warn", message: str = "The query produced no data.", - ) -> NoReturn: - """What to do if a fetch (database query, API request, etc.) produced no data.""" + ) -> None: + """What to do if a fetch (database query, API request) produced no data.""" if if_empty == "warn": self.logger.warning(message) elif if_empty == "skip": @@ -145,9 +160,9 @@ def _handle_if_empty( class SQL(Source): def __init__( self, - driver: str = None, - config_key: str = None, - credentials: str = None, + driver: str | None = None, + config_key: str | None = None, + credentials: str | None = None, query_timeout: int = 60 * 60, *args, **kwargs, @@ -156,19 +171,17 @@ def __init__( Args: driver (str, optional): The SQL driver to use. Defaults to None. - config_key (str, optional): The key inside local config containing the config. - User can choose to use this or pass credentials directly to the `credentials` - parameter. Defaults to None. - credentials (str, optional): Credentials for the connection. Defaults to None. - query_timeout (int, optional): The timeout for executed queries. Defaults to 1 hour. + config_key (str, optional): The key inside local config containing the + config. User can choose to use this or pass credentials directly to the + `credentials` parameter. Defaults to None. + credentials (str, optional): Credentials for the connection. Defaults to + None. + query_timeout (int, optional): The timeout for executed queries. Defaults to + 1 hour. """ - self.query_timeout = query_timeout - if config_key: - config_credentials = get_source_credentials(config_key) - else: - config_credentials = None + config_credentials = get_source_credentials(config_key) if config_key else None credentials = credentials or config_credentials or {} @@ -182,6 +195,7 @@ def __init__( @property def conn_str(self) -> str: """Generate a connection string from params or config. + Note that the user and password are escaped with '{}' characters. Returns: @@ -212,7 +226,16 @@ def con(self) -> pyodbc.Connection: self._con.timeout = self.query_timeout return self._con - def run(self, query: str) -> Union[List[Record], bool]: + def run(self, query: str) -> list[Record] | bool: + """Execute a query and return the result. + + Args: + query (str): The query to execute. + + Returns: + list[Record] | bool: If the query is a SELECT, return the result as a list + of records. + """ cursor = self.con.cursor() cursor.execute(query) @@ -230,14 +253,16 @@ def run(self, query: str) -> Union[List[Record], bool]: def to_df( self, query: str, - con: pyodbc.Connection = None, + con: pyodbc.Connection | None = None, if_empty: Literal["warn", "skip", "fail"] = "warn", ) -> pd.DataFrame: - """Creates DataFrame form SQL query. + """Execute a query and return the result as a pandas DataFrame. + Args: - query (str): SQL query. If don't start with "SELECT" returns empty DataFrame. + query (str): The query to execute. con (pyodbc.Connection, optional): The connection to use to pull the data. - if_empty (Literal["warn", "skip", "fail"], optional): What to do if the query returns no data. Defaults to None. + if_empty (Literal["warn", "skip", "fail"], optional): What to do if the + query returns no data. Defaults to None. """ conn = con or self.con @@ -250,21 +275,21 @@ def to_df( df = pd.DataFrame() return df - def _check_if_table_exists(self, table: str, schema: str = None) -> bool: - """Checks if table exists. + def _check_if_table_exists(self, table: str, schema: str | None = None) -> bool: + """Check if table exists in a specified schema. + Args: table (str): Table name. schema (str, optional): Schema name. Defaults to None. """ - exists_query = f"SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME='{table}'" - exists = bool(self.run(exists_query)) - return exists + exists_query = f"SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME='{table}'" # noqa: S608 + return bool(self.run(exists_query)) def create_table( self, table: str, - schema: str = None, - dtypes: Dict[str, Any] = None, + schema: str | None = None, + dtypes: dict[str, Any] | None = None, if_exists: Literal["fail", "replace", "skip", "delete"] = "fail", ) -> bool: """Create a table. @@ -272,8 +297,10 @@ def create_table( Args: table (str): The destination table. Defaults to None. schema (str, optional): The destination schema. Defaults to None. - dtypes (Dict[str, Any], optional): The data types to use for the table. Defaults to None. - if_exists (Literal, optional): What to do if the table already exists. Defaults to "fail". + dtypes (Dict[str, Any], optional): The data types to use for the table. + Defaults to None. + if_exists (Literal, optional): What to do if the table already exists. + Defaults to "fail". Returns: bool: Whether the operation was successful. @@ -285,12 +312,13 @@ def create_table( if if_exists == "replace": self.run(f"DROP TABLE {fqn}") elif if_exists == "delete": - self.run(f"DELETE FROM {fqn}") + self.run(f"DELETE FROM {fqn}") # noqa: S608 return True elif if_exists == "fail": - raise ValueError( - "The table already exists and 'if_exists' is set to 'fail'." + msg = ( + f"The table {fqn} already exists and 'if_exists' is set to 'fail'." ) + raise ValueError(msg) elif if_exists == "skip": return False @@ -304,8 +332,7 @@ def create_table( return True def insert_into(self, table: str, df: pd.DataFrame) -> str: - """Insert values from a pandas DataFrame into an existing - database table. + """Insert values from a pandas DataFrame into an existing database table. Args: table (str): table name @@ -314,13 +341,12 @@ def insert_into(self, table: str, df: pd.DataFrame) -> str: Returns: str: The executed SQL insert query. """ - values = "" rows_count = df.shape[0] counter = 0 for row in df.values: - counter += 1 - out_row = ", ".join(map(self._sql_column, row)) + counter += 1 # noqa: SIM113 + out_row = ", ".join(map(self._escape_column_name, row)) comma = ",\n" if counter == rows_count: comma = ";" @@ -329,15 +355,11 @@ def insert_into(self, table: str, df: pd.DataFrame) -> str: columns = ", ".join(df.columns) - sql = f"INSERT INTO {table} ({columns})\n VALUES {values}" + sql = f"INSERT INTO {table} ({columns})\n VALUES {values}" # noqa: S608 self.run(sql) return sql - def _sql_column(self, column_name: str) -> str: - """Returns the name of a column""" - if isinstance(column_name, str): - out_name = f"'{column_name}'" - else: - out_name = str(column_name) - return out_name + def _escape_column_name(self, column_name: str) -> str: + """Return an escaped column name.""" + return f"'{column_name}'" diff --git a/src/viadot/sources/cloud_for_customers.py b/src/viadot/sources/cloud_for_customers.py index 6843c4842..57ae97cac 100644 --- a/src/viadot/sources/cloud_for_customers.py +++ b/src/viadot/sources/cloud_for_customers.py @@ -1,11 +1,13 @@ -import re +"""A connector for Cloud For Customers API.""" + from copy import deepcopy -from typing import Any, Dict, List, Optional +import re +from typing import Any, Literal from urllib.parse import urljoin import pandas as pd -import requests from pydantic import BaseModel, SecretStr, root_validator +import requests from viadot.config import get_source_credentials from viadot.exceptions import CredentialError @@ -14,49 +16,86 @@ class CloudForCustomersCredentials(BaseModel): + """Cloud for Customers connector credentials validator. + + Validate the credentials. + + Methods: + is_configured: main method to validate. + """ + username: str # eg. username@{tenant_name}.com password: SecretStr - url: Optional[str] = None # The URL to extract records from. - report_url: Optional[str] = None # The URL of a prepared report. + url: str | None = None # The URL to extract records from. + report_url: str | None = None # The URL of a prepared report. + @classmethod @root_validator(pre=True) - def is_configured(cls, credentials): + def is_configured(cls, credentials: dict) -> dict: + """Validate Credentials. + + Args: + credentials (dict): dictionary with user and password. + + Returns: + credentials (dict): dictionary with user and password. + """ username = credentials.get("username") password = credentials.get("password") if not (username and password): - raise CredentialError("`username` and `password` credentials are required.") + msg = "`username` and `password` credentials are required." + raise CredentialError(msg) return credentials class CloudForCustomers(Source): - """Cloud for Customers connector to fetch Odata source. + """Cloud for Customers connector to fetch OData source. Args: - url (str, optional): The URL to the C4C API. E.g 'https://myNNNNNN.crm.ondemand.com/c4c/v1/'. + url (str, optional): The URL to the C4C API. For example, + 'https://myNNNNNN.crm.ondemand.com/c4c/v1/'. endpoint (str, optional): The API endpoint. report_url (str, optional): The URL of a prepared report. - filter_params (Dict[str, Any], optional): Filtering parameters passed to the request. E.g {"$filter": "AccountID eq '1234'"}. - More info on: https://userapps.support.sap.com/sap/support/knowledge/en/2330688 - credentials (CloudForCustomersCredentials, optional): Cloud for Customers credentials. - config_key (str, optional): The key in the viadot config holding relevant credentials. + filter_params (Dict[str, Any], optional): Filtering parameters passed to the + request. E.g {"$filter": "AccountID eq '1234'"}. More info on: + https://userapps.support.sap.com/sap/support/knowledge/en/2330688 + credentials (CloudForCustomersCredentials, optional): Cloud for Customers + credentials. + config_key (str, optional): The key in the viadot config holding relevant + credentials. """ - DEFAULT_PARAMS = {"$format": "json"} + DEFAULT_PARAMS = {"$format": "json"} # noqa: RUF012 def __init__( self, - url: str = None, - endpoint: str = None, - report_url: str = None, - filter_params: Dict[str, Any] = None, - credentials: CloudForCustomersCredentials = None, - config_key: Optional[str] = None, *args, + url: str | None = None, + endpoint: str | None = None, + report_url: str | None = None, + filter_params: dict[str, Any] | None = None, + credentials: CloudForCustomersCredentials | None = None, + config_key: str | None = None, **kwargs, ): - ## Credentials logic + """Initialize the class with the provided parameters. + + Args: + *args: Variable length argument list. + url (str, optional): The base URL for the service. + endpoint (str, optional): The specific endpoint for the service. + report_url (str, optional): The URL for the report. + filter_params (Dict[str, Any], optional): Parameters to filter the report + data. + credentials (CloudForCustomersCredentials, optional): Credentials required + for authentication. + config_key (Optional[str], optional): A key to retrieve specific + configuration settings. + **kwargs: Arbitrary keyword arguments. + """ + # Credentials logic raw_creds = credentials or get_source_credentials(config_key) or {} validated_creds = dict( CloudForCustomersCredentials(**raw_creds) @@ -71,7 +110,6 @@ def __init__( if self.url: self.full_url = urljoin(self.url, self.endpoint) - if filter_params: filter_params_merged = self.DEFAULT_PARAMS.copy() filter_params_merged.update(filter_params) @@ -82,7 +120,7 @@ def __init__( @staticmethod def create_metadata_url(url: str) -> str: - """Creates URL to fetch metadata from. + """Create URL to fetch metadata from. Args: url (str): The URL to transform to metadata URL. @@ -93,11 +131,10 @@ def create_metadata_url(url: str) -> str: start = url.split(".svc")[0] url_raw = url.split("?")[0] end = url_raw.split("/")[-1] - meta_url = start + ".svc/$metadata?entityset=" + end - return meta_url + return start + ".svc/$metadata?entityset=" + end - def _extract_records_from_report_url(self, report_url: str) -> List[Dict[str, Any]]: - """Fetches report_url to extract records. + def _extract_records_from_report_url(self, report_url: str) -> list[dict[str, Any]]: + """Fetch report_url to extract records. Args: report_url (str): The url to extract records from. @@ -116,8 +153,8 @@ def _extract_records_from_report_url(self, report_url: str) -> List[Dict[str, An return records - def _extract_records_from_url(self, url: str) -> List[Dict[str, Any]]: - """Fetches URL to extract records. + def _extract_records_from_url(self, url: str) -> list[dict[str, Any]]: + """Fetch URL to extract records. Args: url (str): The URL to extract records from. @@ -140,7 +177,8 @@ def _extract_records_from_url(self, url: str) -> List[Dict[str, Any]]: new_records = response_json["d"] url = response_json.get("__next", None) - # prevents concatenation of previous urls with filter_params with the same filter_params + # prevents concatenation of previous urls with filter_params with the same + # filter_params tmp_filter_params = None tmp_full_url = url @@ -149,27 +187,26 @@ def _extract_records_from_url(self, url: str) -> List[Dict[str, Any]]: return records def extract_records( - self, url: Optional[str], report_url: Optional[str] - ) -> List[Dict[str, Any]]: - """Downloads records from `url` or `report_url` if present. + self, url: str | None = None, report_url: str | None = None + ) -> list[dict[str, Any]]: + """Download records from `url` or `report_url` if present. Returns: records (List[Dict[str, Any]]): The records extracted from URL. """ if self.is_report: - return self._extract_records_from_report_url(url=report_url) - else: - if url: - full_url = urljoin(url, self.endpoint) - else: - full_url = self.full_url - return self._extract_records_from_url(url=full_url) + return self._extract_records_from_report_url(report_url=report_url) + full_url = urljoin(url, self.endpoint) if url else self.full_url + return self._extract_records_from_url(url=full_url) def get_entities( - self, dirty_json: Dict[str, Any], url: str - ) -> List[Dict[str, Any]]: - """Extracts entities from request.json(). Entities represents objects that store information. - More info on: https://help.sap.com/docs/EAD_HANA/0e60f05842fd41078917822867220c78/0bd1db568fa546d6823d4c19a6b609ab.html + self, dirty_json: dict[str, Any], url: str + ) -> list[dict[str, Any]]: + """Extract entities from request.json(). + + Entities represent objects that store information. More info on: + https://help.sap.com/docs/EAD_HANA/0e60f05842fd41078917822867220c78/ + 0bd1db568fa546d6823d4c19a6b609ab.html Args: dirty_json (Dict[str, Any]): request.json() dict from response to API. @@ -178,27 +215,29 @@ def get_entities( Returns: entities (List[Dict[str, Any]]): list filled with entities. """ - metadata_url = self.create_metadata_url(url) column_maper_dict = self.get_property_to_sap_label_dict(metadata_url) entities = [] for element in dirty_json["d"]["results"]: new_entity = {} for key, object_of_interest in element.items(): - if key not in ["__metadata", "Photo", "", "Picture"]: - if "{" not in str(object_of_interest): - new_key = column_maper_dict.get(key) - if new_key: - new_entity[new_key] = object_of_interest - else: - new_entity[key] = object_of_interest + if key not in ["__metadata", "Photo", "", "Picture"] and "{" not in str( + object_of_interest + ): + new_key = column_maper_dict.get(key) + if new_key: + new_entity[new_key] = object_of_interest + else: + new_entity[key] = object_of_interest entities.append(new_entity) return entities - def get_property_to_sap_label_dict(self, url: str = None) -> Dict[str, str]: - """Creates Dict that maps Property Name to value of SAP label. + def get_property_to_sap_label_dict(self, url: str | None = None) -> dict[str, str]: + """Create Dict that maps Property Name to value of SAP label. + Property: Properties define the characteristics of the data. - SAP label: Labels are used for identification and for provision of content information. + SAP label: Labels are used for identification and for provision of content + information. Args: url (str, optional): The URL to fetch metadata from. @@ -206,12 +245,13 @@ def get_property_to_sap_label_dict(self, url: str = None) -> Dict[str, str]: Returns: Dict[str, str]: Property Name to value of SAP label. """ - column_mapping = {} if url: username = self.credentials.get("username") - pw = self.credentials.get("password") - response = requests.get(url, auth=(username, pw)) + password = self.credentials.get("password") + response = requests.get( + url, auth=(username, password), timeout=(3.05, 60 * 5) + ) for sentence in response.text.split("/>"): result = re.search( r'(?<=Name=")([^"]+).+(sap:label=")([^"]+)+', sentence @@ -225,51 +265,58 @@ def get_property_to_sap_label_dict(self, url: str = None) -> Dict[str, str]: def get_response( self, url: str, - filter_params: Dict[str, Any] = None, + filter_params: dict[str, Any] | None = None, timeout: tuple = (3.05, 60 * 30), ) -> requests.models.Response: - """Handles requests. + """Handle requests. Args: url (str): The url to request to. - filter_params (Dict[str, Any], optional): Additional parameters like filter, used in case of normal url. + filter_params (Dict[str, Any], optional): Additional parameters like filter, + used in case of normal url. timeout (tuple, optional): The request time-out. Default is (3.05, 60 * 30). Returns: requests.models.Response. """ username = self.credentials.get("username") - pw = self.credentials.get("password") - response = handle_api_response( + password = self.credentials.get("password") + return handle_api_response( url=url, params=filter_params, - auth=(username, pw), + auth=(username, password), timeout=timeout, ) - return response def to_df( self, - url: str = None, - fields: List[str] = None, - dtype: dict = None, - tests: dict = None, + if_empty: Literal["warn", "skip", "fail"] = "warn", **kwargs, ) -> pd.DataFrame: """Download a table or report into a pandas DataFrame. Args: - url (str): The URL to extract records from. - fields (List[str], optional): List of fields to put in DataFrame. - dtype (dict, optional): The dtypes to use in the DataFrame. - tests (Dict[str], optional): A dictionary with optional list of tests - to verify the output dataframe. If defined, triggers the `validate` - function from utils. Defaults to None. kwargs: The parameters to pass to DataFrame constructor. Returns: df (pandas.DataFrame): DataFrame containing the records. """ + # Your implementation here + if if_empty == "warn": + self.logger.info("Warning: DataFrame is empty.") + elif if_empty == "skip": + self.logger.info("Skipping due to empty DataFrame.") + elif if_empty == "fail": + self.logger.info("Failing due to empty DataFrame.") + else: + msg = "Invalid value for if_empty parameter." + raise ValueError(msg) + + url: str = kwargs.get("url", "") + fields: list[str] = kwargs.get("fields", []) + dtype: dict[str, Any] = kwargs.get("dtype", {}) + tests: dict[str, Any] = kwargs.get("tests", {}) + url = url or self.url records = self.extract_records(url=url) df = pd.DataFrame(data=records, **kwargs) diff --git a/src/viadot/sources/databricks.py b/src/viadot/sources/databricks.py index ec8f50687..10a01217c 100644 --- a/src/viadot/sources/databricks.py +++ b/src/viadot/sources/databricks.py @@ -1,19 +1,27 @@ +"""Databricks connector.""" + import json -import os -from typing import Literal, Optional, Union +from pathlib import Path +from typing import Literal, Union import pandas as pd + try: - import pyspark.sql.dataframe as spark from delta.tables import * # noqa -except ModuleNotFoundError: - raise ImportError("pyspark.sql.dataframe is required to use Databricks source.") + import pyspark.sql.dataframe as spark +except ModuleNotFoundError as e: + msg = "Missing required modules to use Databricks source." + raise ImportError(msg) from e from pydantic import BaseModel, root_validator from viadot.config import get_source_credentials -from viadot.exceptions import CredentialError, TableAlreadyExists, TableDoesNotExist +from viadot.exceptions import ( + CredentialError, + TableAlreadyExistsError, + TableDoesNotExistError, +) from viadot.sources.base import Source from viadot.utils import ( _cast_df_cols, @@ -27,48 +35,45 @@ class DatabricksCredentials(BaseModel): host: str # The host address of the Databricks cluster. port: str = "15001" # The port on which the cluster is exposed. By default '15001'. For Spark Connect, use the port 443 by default. cluster_id: str # The ID of the Databricks cluster to which to connect. - org_id: Optional[ - str - ] # The ID of the Databricks organization to which the cluster belongs. Not required when using Spark Connect. + org_id: ( + str | None + ) # The ID of the Databricks organization to which the cluster belongs. Not required when using Spark Connect. token: str # The access token which will be used to connect to the cluster. @root_validator(pre=True) - def is_configured(cls, credentials): + def is_configured(cls, credentials: dict) -> dict: # noqa: N805, D102 host = credentials.get("host") cluster_id = credentials.get("cluster_id") token = credentials.get("token") if not (host and cluster_id and token): - raise CredentialError( - "Databricks credentials are not configured correctly." - ) + mgs = "Databricks credentials are not configured correctly." + raise CredentialError(mgs) return credentials class Databricks(Source): - """ - A class for pulling and manipulating data on Databricks. - - Documentation for Databricks is located at: - https://docs.microsoft.com/en-us/azure/databricks/ - - Parameters - ---------- - credentials : DatabricksCredentials, optional - Databricks connection configuration. - config_key (str, optional): The key in the viadot config holding relevant - credentials. - """ - DEFAULT_SCHEMA = "default" def __init__( self, - credentials: DatabricksCredentials = None, - config_key: str = None, + credentials: DatabricksCredentials | None = None, + config_key: str | None = None, *args, **kwargs, ): + """A class for pulling and manipulating data on Databricks. + + Documentation for Databricks is located at: + https://docs.microsoft.com/en-us/azure/databricks/ + + Parameters + ---------- + credentials : DatabricksCredentials, optional + Databricks connection configuration. + config_key (str, optional): The key in the viadot config holding relevant + credentials. + """ raw_creds = credentials or get_source_credentials(config_key) validated_creds = dict( DatabricksCredentials(**raw_creds) @@ -78,10 +83,10 @@ def __init__( self._session = None - def __enter__(self): + def __enter__(self): # noqa: D105 return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, traceback): # noqa: D105, ANN001 if self._session: self._session.stop() self._session = None @@ -95,30 +100,26 @@ def session(self) -> Union[SparkSession, "DatabricksSession"]: # noqa return self._session def _create_spark_session(self): - """ - Establish a connection to the Databricks cluster. + """Establish a connection to the Databricks cluster. Returns: SparkSession: A configured SparkSession object. """ - - db_connect_config = dict( - host=self.credentials.get("host"), - token=self.credentials.get("token"), - cluster_id=self.credentials.get("cluster_id"), - org_id=self.credentials.get("org_id"), - port=self.credentials.get("port"), - ) - - with open(os.path.expanduser("~/.databricks-connect"), "w") as f: + db_connect_config = { + "host": self.credentials.get("host"), + "token": self.credentials.get("token"), + "cluster_id": self.credentials.get("cluster_id"), + "org_id": self.credentials.get("org_id"), + "port": self.credentials.get("port"), + } + + with Path.open(Path.expanduser("~/.databricks-connect"), "w") as f: json.dump(db_connect_config, f) - spark = SparkSession.builder.getOrCreate() # noqa - return spark + return SparkSession.builder.getOrCreate() # noqa def _create_spark_connect_session(self): - """ - Establish a connection to a Databricks cluster. + """Establish a connection to a Databricks cluster. Returns: SparkSession: A configured SparkSession object. @@ -131,9 +132,8 @@ def _create_spark_connect_session(self): cluster_id = self.credentials.get("cluster_id") conn_str = f"sc://{workspace_instance_name}:{port}/;token={token};x-databricks-cluster-id={cluster_id}" - spark = DatabricksSession.builder.remote(conn_str).getOrCreate() - return spark + return DatabricksSession.builder.remote(conn_str).getOrCreate() @add_viadot_metadata_columns def to_df( @@ -141,8 +141,7 @@ def to_df( query: str, if_empty: Literal["warn", "skip", "fail"] = "warn", ) -> pd.DataFrame: - """ - Execute a query and return a Pandas DataFrame. + """Execute a query and return a Pandas DataFrame. Args: query (str): The query to execute @@ -168,11 +167,11 @@ def to_df( return df def _pandas_df_to_spark_df(self, df: pd.DataFrame) -> spark.DataFrame: - """ - Convert a Pandas DataFrame to a Spark DataFrame. + """Convert a Pandas DataFrame to a Spark DataFrame. Args: - df (pd.DataFrame): The Pandas DataFrame to be converted to a Spark DataFrame. + df (pd.DataFrame): The Pandas DataFrame to be converted to a Spark + DataFrame. Example: ```python @@ -188,15 +187,14 @@ def _pandas_df_to_spark_df(self, df: pd.DataFrame) -> spark.DataFrame: Returns: spark.DataFrame: The resulting Spark DataFrame. """ - spark_df = self.session.createDataFrame(df) - return spark_df + return self.session.createDataFrame(df) def _spark_df_to_pandas_df(self, spark_df: spark.DataFrame) -> pd.DataFrame: - """ - Convert a Spark DataFrame to a Pandas DataFrame. + """Convert a Spark DataFrame to a Pandas DataFrame. Args: - df (spark.DataFrame): The Spark DataFrame to be converted to a Pandas DataFrame. + df (spark.DataFrame): The Spark DataFrame to be converted to a Pandas + DataFrame. Example: ```python @@ -216,14 +214,14 @@ def _spark_df_to_pandas_df(self, spark_df: spark.DataFrame) -> pd.DataFrame: def run( self, query: str, fetch_type: Literal["spark", "pandas"] = "spark" - ) -> Union[spark.DataFrame, pd.DataFrame, bool]: - """ - Execute an SQL query. + ) -> spark.DataFrame | pd.DataFrame | bool: + """Execute an SQL query. Args: query (str): The query to execute. fetch_type (Literal, optional): How to return the data: either - in the default Spark DataFrame format or as a Pandas DataFrame. Defaults to "spark". + in the default Spark DataFrame format or as a Pandas DataFrame. Defaults + to "spark". Example: ```python @@ -233,14 +231,13 @@ def run( query_result = databricks.run("SELECT * FROM schema.table_1") ``` Returns: - Union[spark.DataFrame, pd.DataFrame, bool]: Either the result set of a query or, - in case of DDL/DML queries, a boolean describing whether - the query was executed successfully. + Union[spark.DataFrame, pd.DataFrame, bool]: Either the result set of a query + or, in case of DDL/DML queries, a boolean describing whether the query was + executed successfully. """ if fetch_type not in ["spark", "pandas"]: - raise ValueError( - "Only the values 'spark', 'pandas' are allowed for 'fetch_type'" - ) + msg = "Only the values 'spark', 'pandas' are allowed for 'fetch_type'" + raise ValueError(msg) query_clean = query.upper().strip() query_keywords = ["SELECT", "SHOW", "PRAGMA", "DESCRIBE"] @@ -257,7 +254,7 @@ def run( return result - def _check_if_table_exists(self, table: str, schema: str = None) -> bool: + def _check_if_table_exists(self, table: str, schema: str | None = None) -> bool: if schema is None: schema = Databricks.DEFAULT_SCHEMA return self.session.catalog.tableExists(dbName=schema, tableName=table) @@ -269,14 +266,13 @@ def create_table_from_pandas( self, df: pd.DataFrame, table: str, - schema: str = None, + schema: str | None = None, if_empty: Literal["warn", "skip", "fail"] = "warn", if_exists: Literal["replace", "skip", "fail"] = "fail", snakecase_column_names: bool = True, cast_df_columns: bool = True, ) -> bool: - """ - Create a table using a pandas `DataFrame`. + """Create a table using a pandas `DataFrame`. Args: df (pd.DataFrame): The `DataFrame` to be written as a table. @@ -286,10 +282,11 @@ def create_table_from_pandas( Defaults to 'warn'. if_exists (Literal, optional): What to do if the table already exists. Defaults to 'fail'. - snakecase_column_names (bool, optional): Whether to convert column names to snake case. - Defaults to True. - cast_df_columns (bool, optional): Converts column types in DataFrame using utils._cast_df_cols(). - This param exists because of possible errors with object cols. Defaults to True. + snakecase_column_names (bool, optional): Whether to convert column names to + snake case. Defaults to True. + cast_df_columns (bool, optional): Converts column types in DataFrame using + utils._cast_df_cols(). This param exists because of possible errors with + object cols. Defaults to True. Example: ```python @@ -306,7 +303,6 @@ def create_table_from_pandas( Returns: bool: True if the table was created successfully, False otherwise. """ - if df.empty: self._handle_if_empty(if_empty) @@ -327,7 +323,7 @@ def create_table_from_pandas( self.logger.warning(f"Table {fqn} already exists.") result = False elif if_exists == "fail": - raise TableAlreadyExists(fqn) + raise TableAlreadyExistsError(fqn) elif if_exists == "replace": result = self._full_refresh(schema=schema, table=table, df=df) else: @@ -338,7 +334,7 @@ def create_table_from_pandas( sdf.createOrReplaceTempView("tmp_view") result = self.run( - f"CREATE TABLE {fqn} USING DELTA AS SELECT * FROM tmp_view;" + f"CREATE TABLE {fqn} USING DELTA AS SELECT * FROM tmp_view;" # noqa: S608 ) if result: @@ -346,16 +342,15 @@ def create_table_from_pandas( return result - def drop_table(self, table: str, schema: str = None) -> bool: - """ - Delete an existing table. + def drop_table(self, table: str, schema: str | None = None) -> bool: + """Delete an existing table. Args: schema (str): Name of the schema. table (str): Name of the new table to be created. Raises: - TableDoesNotExist: If the table does not exist. + TableDoesNotExistError: If the table does not exist. Example: ```python @@ -375,7 +370,7 @@ def drop_table(self, table: str, schema: str = None) -> bool: result = self.run(f"DROP TABLE {fqn}") self.logger.info(f"Table {fqn} has been deleted successfully.") else: - raise TableDoesNotExist(fqn=fqn) + raise TableDoesNotExistError(fqn=fqn) return result @@ -387,8 +382,7 @@ def _append(self, schema: str, table: str, df: pd.DataFrame): self.logger.info(f"Table {fqn} has been appended successfully.") def _full_refresh(self, schema: str, table: str, df: pd.DataFrame) -> bool: - """ - Overwrite an existing table with data from a Pandas DataFrame. + """Overwrite an existing table with data from a Pandas DataFrame. Args: schema (str): Name of the schema. @@ -403,7 +397,9 @@ def _full_refresh(self, schema: str, table: str, df: pd.DataFrame) -> bool: list = [{"id":"1", "name":"Joe"}] df = pd.DataFrame(list) - databricks.insert_into( df=df, schema="viadot_test", table="test", mode="replace") + databricks.insert_into( + df=df, schema="viadot_test", table="test", mode="replace" + ) ``` Returns: bool: True if the table has been refreshed successfully, False otherwise. @@ -423,7 +419,7 @@ def _upsert( df: pd.DataFrame, table: str, primary_key: str, - schema: str = None, + schema: str | None = None, ): spark_df = self._pandas_df_to_spark_df(df) merge_query = build_merge_query( @@ -443,21 +439,21 @@ def insert_into( self, df: pd.DataFrame, table: str, - schema: str = None, - primary_key: str = None, + schema: str | None = None, + primary_key: str | None = None, mode: Literal["replace", "append", "update"] = "append", ) -> None: - """ - Insert data from a pandas `DataFrame` into a Delta table. + """Insert data from a pandas `DataFrame` into a Delta table. Args: df (pd.DataFrame): DataFrame with the data to be inserted into the table. table (str): Name of the new table to be created. schema (str, Optional): Name of the schema. - primary_key (str, Optional): The primary key on which the data will be joined. + primary_key (str, Optional): The primary key on which the data will be + joined. Required only when updating existing data. - mode (str, Optional): Which operation to run with the data. Allowed operations - are: 'replace', 'append', and 'update'. By default, 'append'. + mode (str, Optional): Which operation to run with the data. Allowed + operations are: 'replace', 'append', and 'update'. By default, 'append'. Example: ```python @@ -486,15 +482,14 @@ def insert_into( elif mode == "update": self._upsert(df=df, schema=schema, table=table, primary_key=primary_key) else: - raise ValueError( - "`mode` must be one of: 'replace', 'append', or 'update'." - ) + msg = "`mode` must be one of: 'replace', 'append', or 'update'." + raise ValueError(msg) else: - raise ValueError(f"Table {fqn} does not exist.") + msg = f"Table {fqn} does not exist." + raise ValueError(msg) def create_schema(self, schema_name: str) -> bool: - """ - Create a schema for storing tables. + """Create a schema for storing tables. Args: schema_name (str): Name of the new schema to be created. @@ -513,8 +508,7 @@ def create_schema(self, schema_name: str) -> bool: return result def drop_schema(self, schema_name: str) -> bool: - """ - Delete a schema. + """Delete a schema. Args: schema_name (str): Name of the schema to be deleted. @@ -532,9 +526,8 @@ def drop_schema(self, schema_name: str) -> bool: self.logger.info(f"Schema {schema_name} deleted.") return result - def discover_schema(self, table: str, schema: str = None) -> dict: - """ - Return a table's schema. + def discover_schema(self, table: str, schema: str | None = None) -> dict: + """Return a table's schema. Args: schema (str): Name of the schema. @@ -564,13 +557,10 @@ def discover_schema(self, table: str, schema: str = None) -> dict: data_types = result["data_type"].values.tolist() data_types = data_types[:-3] - schema = dict(zip(col_names, data_types)) + return dict(zip(col_names, data_types, strict=False)) - return schema - - def get_table_version(self, table: str, schema: str = None) -> int: - """ - Get the provided table's version number. + def get_table_version(self, table: str, schema: str | None = None) -> int: + """Get the provided table's version number. Args: schema (str): Name of the schema. @@ -592,9 +582,10 @@ def get_table_version(self, table: str, schema: str = None) -> int: version_number = history["version"].iat[0] return int(version_number) - def rollback(self, table: str, version_number: int, schema: str = None) -> bool: - """ - Rollback a table to a previous version. + def rollback( + self, table: str, version_number: int, schema: str | None = None + ) -> bool: + """Rollback a table to a previous version. Args: schema (str): Name of the schema. @@ -621,14 +612,13 @@ def rollback(self, table: str, version_number: int, schema: str = None) -> bool: Returns: result (bool): A boolean indicating the success of the rollback. """ - if schema is None: schema = Databricks.DEFAULT_SCHEMA fqn = f"{schema}.{table}" # Retrieve the data from the previous table - old_table = self.to_df(f"SELECT * FROM {fqn}@v{version_number}") + old_table = self.to_df(f"SELECT * FROM {fqn}@v{version_number}") # noqa: S608 # Perform full-refresh and overwrite the table with the new data result = self.insert_into( diff --git a/src/viadot/sources/duckdb.py b/src/viadot/sources/duckdb.py new file mode 100644 index 000000000..607d1f82a --- /dev/null +++ b/src/viadot/sources/duckdb.py @@ -0,0 +1,258 @@ +"""A module for interacting with DuckDB.""" + +import re +from typing import Literal + +import duckdb +import pandas as pd +from pydantic import BaseModel + +from viadot.config import get_source_credentials +from viadot.exceptions import CredentialError +from viadot.signals import SKIP +from viadot.sources.base import Record, Source + + +class DuckDBCredentials(BaseModel): + database: str + read_only: bool = True + + +class DuckDB(Source): + DEFAULT_SCHEMA = "main" + + def __init__( + self, + config_key: str | None = None, + credentials: DuckDBCredentials | None = None, + *args, + **kwargs, + ): + """A class for interacting with DuckDB. + + Args: + config_key (str, optional): The key inside local config containing the + credentials. + credentials (DuckDBCredentials, optional): Credentials for the connection. + Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + """ + raw_creds = credentials or get_source_credentials(config_key) or {} + if credentials is None: + msg = "Please specify the credentials." + raise CredentialError(msg) + validated_creds = dict( + DuckDBCredentials(**raw_creds) + ) # validate the credentials + super().__init__(*args, credentials=validated_creds, **kwargs) + + @property + def con(self) -> duckdb.DuckDBPyConnection: + """Return a new connection to the database. + + As the views are highly isolated, we need a new connection for each query in + order to see the changes from previous queries (eg. if we create a new table and + then we want to list tables from INFORMATION_SCHEMA, we need to create a new + DuckDB connection). + + Returns: + duckdb.DuckDBPyConnection: database connection. + """ + return duckdb.connect( + database=self.credentials.get("database"), + read_only=self.credentials.get("read_only", False), + ) + + @property + def tables(self) -> list[str]: + """Show the list of fully qualified table names. + + Returns: + list[str]: The list of tables in the format '{SCHEMA}.{TABLE}'. + """ + tables_meta: list[tuple] = self.run_query( + "SELECT * FROM information_schema.tables" + ) + return [table_meta[1] + "." + table_meta[2] for table_meta in tables_meta] + + @property + def schemas(self) -> list[str]: + """Show the list of schemas. + + Returns: + list[str]: The list of schemas. + """ + self.logger.warning( + "DuckDB does not expose a way to list schemas. `DuckDB.schemas` only contains schemas with tables." + ) + tables_meta: list[tuple] = self.run_query( + "SELECT * FROM information_schema.tables" + ) + return [table_meta[1] for table_meta in tables_meta] + + def to_df(self, query: str, if_empty: str | None = None) -> pd.DataFrame: + """Run DuckDB query and save output to a pandas DataFrame. + + Args: + query (str): The query to execute. If query doesn't start with SELECT or + WITH, empty DataFrame will be returned. + if_empty (str, optional): What to do if output DataFrame is empty. Defaults + to None. + + Returns: + pd.DataFrame: DataFrame with query output + """ + if query.upper().startswith("SELECT") or query.upper().startswith("WITH"): + df = self.run_query(query, fetch_type="dataframe") + if df.empty: + self._handle_if_empty(if_empty=if_empty) + else: + df = pd.DataFrame() + return df + + def run_query( + self, query: str, fetch_type: Literal["record", "dataframe"] = "record" + ) -> list[Record] | bool: + """Run a query on DuckDB. + + Args: + query (str): The query to execute. + fetch_type (Literal[, optional): How to return the data: either in the + default record format or as a pandas DataFrame. Defaults to "record". + + Returns: + Union[list[Record], bool]: Either the result set of a query or, + in case of DDL/DML queries, a boolean describing whether + the query was executed successfully. + """ + allowed_fetch_type_values = ["record", "dataframe"] + if fetch_type not in allowed_fetch_type_values: + msg = f"Only the values {allowed_fetch_type_values} are allowed for 'fetch_type'" + raise ValueError(msg) + cursor = self.con.cursor() + cursor.execute(query) + + # Cleanup the query. + query_clean = query.upper().strip() + # Find comments. + regex = r"^\s*[--;].*" + lines = query_clean.splitlines() + final_query = "" + + for line_raw in lines: + line = line_raw.strip() + match_object = re.match(regex, line) + if not match_object: + final_query += " " + line + final_query = final_query.strip() + query_keywords = ["SELECT", "SHOW", "PRAGMA", "WITH"] + if any(final_query.startswith(word) for word in query_keywords): + result = cursor.fetchall() if fetch_type == "record" else cursor.fetchdf() + else: + result = True + + cursor.close() + return result + + def _handle_if_empty(self, if_empty: str = "warn") -> None: + if if_empty == "warn": + self.logger.warning("The query produced no data.") + elif if_empty == "skip": + msg = "The query produced no data. Skipping..." + raise SKIP(msg) + elif if_empty == "fail": + msg = "The query produced no data." + raise ValueError(msg) + + def create_table_from_parquet( + self, + table: str, + path: str, + schema: str | None = None, + if_exists: Literal["fail", "replace", "append", "skip", "delete"] = "fail", + ) -> bool: + """Create a DuckDB table with a CTAS from Parquet file(s). + + Args: + table (str): Destination table. + path (str): The path to the source Parquet file(s). Glob expressions are + also allowed here (eg. `my_folder/*.parquet`). + schema (str, optional): Destination schema. Defaults to None. + if_exists (Literal[, optional): What to do if the table already exists. + The 'delete' option deletes data and then inserts new one. Defaults to + "fail". + + Raises: + ValueError: If the table exists and `if_exists` is set to `fail`. + + Returns: + None: Does not return anything. + """ + schema = schema or DuckDB.DEFAULT_SCHEMA + fqn = schema + "." + table + exists = self._check_if_table_exists(schema=schema, table=table) + + if exists: + if if_exists == "replace": + self.run_query(f"DROP TABLE {fqn}") + elif if_exists == "append": + self.logger.info(f"Appending to table {fqn}...") + create_table_query = f"COPY {fqn} FROM '{path}' (FORMAT 'parquet')" + self.run_query(create_table_query) + self.logger.info(f"Successfully appended data to table '{fqn}'.") + return True + elif if_exists == "delete": + self.run_query(f"DELETE FROM {fqn}") # noqa: S608 + self.logger.info(f"Successfully deleted data from table '{fqn}'.") + self.run_query( + f"INSERT INTO {fqn} SELECT * FROM read_parquet('{path}')" # noqa: S608 + ) + self.logger.info(f"Successfully inserted data into table '{fqn}'.") + return True + elif if_exists == "fail": + msg = "The table already exists and 'if_exists' is set to 'fail'." + raise ValueError(msg) + elif if_exists == "skip": + return False + self.run_query(f"CREATE SCHEMA IF NOT EXISTS {schema}") + self.logger.info(f"Creating table {fqn}...") + create_table_query = f"CREATE TABLE {fqn} AS SELECT * FROM '{path}';" # noqa: S608 + self.run_query(create_table_query) + self.logger.info(f"Table {fqn} has been created successfully.") + return True + + def drop_table(self, table: str, schema: str | None = None) -> bool: + """Drop a table. + + A thin wrapper around DuckDB.run_query(), with additional logs. + + Args: + table (str): The table to be dropped. + schema (str, optional): The schema where the table is located. Defaults to + None. + + Returns: + bool: Whether the table was dropped. + """ + schema = schema or DuckDB.DEFAULT_SCHEMA + fqn = schema + "." + table + + self.logger.info(f"Dropping table {fqn}...") + dropped = self.run_query(f"DROP TABLE IF EXISTS {fqn}") + if dropped: + self.logger.info(f"Table {fqn} has been dropped successfully.") + else: + self.logger.info(f"Table {fqn} could not be dropped.") + return dropped + + def _check_if_table_exists(self, table: str, schema: str | None = None) -> bool: + schema = schema or DuckDB.DEFAULT_SCHEMA + fqn = schema + "." + table + return fqn in self.tables + + def _check_if_schema_exists(self, schema: str) -> bool: + if schema == DuckDB.DEFAULT_SCHEMA: + return True + fqns = self.tables + return any(fqn.split(".")[0] == schema for fqn in fqns) diff --git a/src/viadot/sources/exchange_rates.py b/src/viadot/sources/exchange_rates.py index 001759cb6..090c79b32 100644 --- a/src/viadot/sources/exchange_rates.py +++ b/src/viadot/sources/exchange_rates.py @@ -1,6 +1,8 @@ -import json +"""Exchange Rates API connector.""" + from datetime import datetime -from typing import Any, Dict, List, Literal +import json +from typing import Any, Literal import pandas as pd import requests @@ -10,6 +12,7 @@ from viadot.sources.base import Source from viadot.utils import add_viadot_metadata_columns, cleanup_df, validate + Currency = Literal[ "USD", "EUR", "GBP", "CHF", "PLN", "DKK", "COP", "CZK", "SEK", "NOK", "ISK" ] @@ -23,50 +26,58 @@ def __init__( currency: Currency = "USD", start_date: str = datetime.today().strftime("%Y-%m-%d"), end_date: str = datetime.today().strftime("%Y-%m-%d"), - symbols=[ - "USD", - "EUR", - "GBP", - "CHF", - "PLN", - "DKK", - "COP", - "CZK", - "SEK", - "NOK", - "ISK", - ], - credentials: Dict[str, Any] = None, - config_key: str = None, + symbols: list[str] | None = None, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, *args, **kwargs, ): - """ - Class for pulling data from https://api.apilayer.com/exchangerates_data/timeseries + """Download data from https://api.apilayer.com/exchangerates_data/timeseries. Args: - currency (Currency, optional): Base currency to which prices of searched currencies are related. Defaults to "USD". - start_date (str, optional): Initial date for data search. Data range is start_date -> - end_date, supported format 'yyyy-mm-dd'. Defaults to datetime.today().strftime("%Y-%m-%d"). - end_date (str, optional): See above. Defaults to datetime.today().strftime("%Y-%m-%d"). - symbols (list, optional): List of currencies for which exchange rates from base currency will be fetch. - Defaults to [ "USD", "EUR", "GBP", "CHF", "PLN", "DKK", "COP", "CZK", "SEK", "NOK", "ISK" ], Only ISO codes. - credentials (Dict[str, Any], optional): 'api_key'. Defaults to None. - config_key (str, optional): The key in the viadot config holding relevant credentials. + currency (Currency, optional): Base currency to which prices of searched + currencies are related. Defaults to "USD". + start_date (str, optional): Initial date for data search. Data range is + start_date -> end_date, supported format 'yyyy-mm-dd'. Defaults to + datetime.today().strftime("%Y-%m-%d"). + end_date (str, optional): See above. Defaults to + datetime.today().strftime("%Y-%m-%d"). + symbols (list, optional): List of ISO codes for which exchange rates from + base currency will be fetched. Defaults to ["USD", "EUR", "GBP", "CHF", + "PLN", "DKK", "COP", "CZK", "SEK", "NOK", "ISK" ]. + credentials (Dict[str, Any], optional): The credentials to use. Defaults to + None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. """ - credentials = credentials or get_source_credentials(config_key) if credentials is None: - raise CredentialError("Please specify the credentials.") + msg = "Please specify the credentials." + raise CredentialError(msg) super().__init__(*args, credentials=credentials, **kwargs) + if not symbols: + symbols = [ + "USD", + "EUR", + "GBP", + "CHF", + "PLN", + "DKK", + "COP", + "CZK", + "SEK", + "NOK", + "ISK", + ] + self.currency = currency self.start_date = start_date self.end_date = end_date self.symbols = symbols self._validate_symbols(self.symbols, self.currency) - def _validate_symbols(self, symbols, currency): + def _validate_symbols(self, symbols: list[str], currency: str): cur_list = [ "USD", "EUR", @@ -82,17 +93,20 @@ def _validate_symbols(self, symbols, currency): ] if currency not in cur_list: - raise ValueError( - f"The specified currency does not exist or is unsupported: {currency}" - ) + msg = f"The specified currency does not exist or is unsupported: {currency}" + raise ValueError(msg) for i in symbols: if i not in cur_list: - raise ValueError( - f"The specified currency list item does not exist or is not supported: {i}" - ) + msg = f"The specified currency list item does not exist or is not supported: {i}" + raise ValueError(msg) + + def get_data(self) -> dict[str, Any]: + """Download data from the API. - def get_data(self) -> Dict[str, Any]: + Returns: + dict[str, Any]: The data from the API. + """ headers = {"apikey": self.credentials["api_key"]} payload = { "start_date": self.start_date, @@ -100,16 +114,18 @@ def get_data(self) -> Dict[str, Any]: "base": self.currency, "symbols": ",".join(self.symbols), } - try: - response = requests.request( - "GET", ExchangeRates.URL, headers=headers, params=payload - ) - except ConnectionError as e: - raise e + response = requests.request( + "GET", ExchangeRates.URL, headers=headers, params=payload, timeout=(3, 10) + ) return json.loads(response.text) - def to_records(self) -> List[tuple]: + def to_records(self) -> list[tuple]: + """Download data and convert it to a list of records. + + Returns: + list[tuple]: The records of the data. + """ data = self.get_data() records = [] @@ -120,19 +136,27 @@ def to_records(self) -> List[tuple]: for i in data["rates"][j]: records.append(data["rates"][j][i]) - records = [x for x in zip(*[iter(records)] * (2 + len(self.symbols)))] + return list(zip(*[iter(records)] * (2 + len(self.symbols)), strict=False)) - return records + def get_columns(self) -> list[str]: + """Return the columns of the data. - def get_columns(self) -> List[str]: - columns = ["Date", "Base"] + self.symbols + Returns: + list[str]: The columns of the data. + """ + return ["Date", "Base", *self.symbols] - return columns + def to_json(self) -> dict[str, Any]: + """Download data and convert it to a JSON. - def to_json(self) -> Dict[str, Any]: + Returns: + dict[str, Any]: The JSON with the data. + """ records = self.to_records() columns = self.get_columns() - records = [dict(zip(columns, records[i])) for i in range(len(records))] + records = [ + dict(zip(columns, records[i], strict=False)) for i in range(len(records)) + ] json = {} json["currencies"] = records @@ -141,8 +165,16 @@ def to_json(self) -> Dict[str, Any]: @add_viadot_metadata_columns def to_df( self, - tests: dict = None, + tests: dict | None = None, ) -> pd.DataFrame: + """Download data and convert it to a pandas DataFrame. + + Args: + tests (dict | None, optional): The tests specification. Defaults to None. + + Returns: + pd.DataFrame: The pandas DataFrame with the data. + """ json = self.to_json() df = pd.json_normalize(json["currencies"]) df_clean = cleanup_df(df) diff --git a/src/viadot/sources/genesys.py b/src/viadot/sources/genesys.py index 7e54a2c0a..7c1eb6789 100644 --- a/src/viadot/sources/genesys.py +++ b/src/viadot/sources/genesys.py @@ -1,118 +1,146 @@ +"""Genesys Cloud API connector.""" + import asyncio import base64 -import json -import os -import warnings from io import StringIO -from typing import Any, Dict, List, Literal, Optional +import json +import time +from typing import Any import aiohttp -import pandas as pd from aiolimiter import AsyncLimiter +import numpy as np +import pandas as pd +from pydantic import BaseModel from viadot.config import get_source_credentials from viadot.exceptions import APIError, CredentialError -from viadot.signals import SKIP from viadot.sources.base import Source -from viadot.utils import handle_api_response, validate +from viadot.utils import add_viadot_metadata_columns, handle_api_response, validate + -warnings.simplefilter("ignore") +class GenesysCredentials(BaseModel): + """Validate Genesys credentials. + + Two key values are held in the Genesys connector: + - client_id: The unique ID for the organization. + - client_secret: Secret string of characters to have access to divisions. + + Args: + BaseModel (pydantic.main.ModelMetaclass): A base class for creating Pydantic + models. + """ + + client_id: str + client_secret: str class Genesys(Source): + ENVIRONMENTS = ( + "cac1.pure.cloud", + "sae1.pure.cloud", + "mypurecloud.com", + "usw2.pure.cloud", + "aps1.pure.cloud", + "apne3.pure.cloud", + "apne2.pure.cloud", + "mypurecloud.com.au", + "mypurecloud.jp", + "mypurecloud.ie", + "mypurecloud.de", + "euw2.pure.cloud", + "euc2.pure.cloud", + "mec1.pure.cloud", + ) + def __init__( self, - view_type: str = "queue_performance_detail_view", - ids_mapping: Dict[str, Any] = None, - start_date: str = None, - end_date: str = None, - report_name: str = None, - file_extension: Literal["xls", "xlsx", "csv"] = "csv", - config_key: str = None, - credentials: Dict[str, Any] = None, - environment: str = None, - report_url: str = None, - schedule_id: str = None, - report_columns: List[str] = None, - *args: List[Any], - **kwargs: Dict[str, Any], + *args, + credentials: GenesysCredentials | None = None, + config_key: str = "genesys", + verbose: bool = False, + environment: str = "mypurecloud.de", + **kwargs, ): - """ - Genesys connector which allows for reports scheduling, listing and downloading into DataFrame or specified format output. + """Genesys Cloud API connector. + + Provides functionalities for connecting to Genesys Cloud API and downloading + generated reports. It includes the following features: + + - Generate reports inside Genesys. + - Download the reports previously created. + - Direct connection to Genesys Cloud API, via GET method, to retrieve the data + without any report creation. + - Remove any report previously created. Args: - view_type (str, optional): The type of view export job to be created. Defaults to "queue_performance_detail_view". - ids_mapping (str, optional): Dictionary mapping for converting IDs to strings. Defaults to None. - start_date (str, optional): Start date of the report. Defaults to None. - end_date (str, optional): End date of the report. Defaults to None. - report_name (str, optional): Name of the report. Defaults to None. - file_extension (Literal[xls, xlsx, csv], optional): File extensions for downloaded files. Defaults to "csv". - credentials (Dict[str, Any], optional): Credentials to connect with Genesys API containing CLIENT_ID, - environment (str, optional): Adress of host server. Defaults to None than will be used enviroment - from credentials. - report_url (str, optional): The url of report generated in json response. Defaults to None. - schedule_id (str, optional): The ID of report. Defaults to None. - report_columns (List[str], optional): List of exisiting column in report. Defaults to None. + credentials (Optional[GenesysCredentials], optional): Genesys credentials. + Defaults to None + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to "genesys". + verbose (bool, optional): Increase the details of the logs printed on the + screen. Defaults to False. + environment (str, optional): the domain that appears for Genesys Cloud + Environment based on the location of your Genesys Cloud organization. + Defaults to "mypurecloud.de". + + Examples: + genesys = Genesys( + credentials=credentials, + config_key=config_key, + verbose=verbose, + environment=environment, + ) + genesys.api_connection( + endpoint=endpoint, + queues_ids=queues_ids, + view_type=view_type, + view_type_time_sleep=view_type_time_sleep, + post_data_list=post_data_list, + normalization_sep=normalization_sep, + ) + data_frame = genesys.to_df( + drop_duplicates=drop_duplicates, + validate_df_dict=validate_df_dict, + ) Raises: - CredentialError: If credentials are not provided in local_config or directly as a parameter. + CredentialError: If credentials are not provided in local_config or directly + as a parameter. + APIError: When the environment variable is not among the available. """ - - credentials = credentials or get_source_credentials(config_key) or {} - + credentials = credentials or get_source_credentials(config_key) or None if credentials is None: - raise CredentialError("Please specify the credentials.") - + msg = "Missing credentials." + raise CredentialError(msg) self.credentials = credentials - super().__init__(*args, credentials=self.credentials, **kwargs) - - self.view_type = view_type - self.schedule_id = schedule_id - self.report_name = report_name - self.environment = environment - self.report_url = report_url - self.report_columns = report_columns - - self.start_date = start_date - self.end_date = end_date - self.file_extension = file_extension - self.ids_mapping = ids_mapping - self.count = iter(range(99999)) - - if self.schedule_id is None: - self.schedule_id = self.credentials.get("SCHEDULE_ID", None) + validated_creds = dict(GenesysCredentials(**credentials)) + super().__init__(*args, credentials=validated_creds, **kwargs) - if self.environment is None: - self.environment = self.credentials.get("ENVIRONMENT", None) + self.verbose = verbose + self.data_returned = {} + self.new_report = "{}" - if self.ids_mapping is None: - self.ids_mapping = self.credentials.get("IDS_MAPPING", None) - - if isinstance(self.ids_mapping, dict) and self.ids_mapping is not None: - self.logger.info("IDS_MAPPING loaded from local credential.") - else: - self.logger.warning( - "IDS_MAPPING is not provided in you credentials or is not a dictionary." - ) - - self.report_data = [] + if environment in self.ENVIRONMENTS: + self.environment = environment + else: + raise APIError( + f"Environment '{environment}' not available" + + " in Genesys Cloud Environments." + ) @property - def authorization_token(self, verbose: bool = False) -> Dict[str, Any]: - """ - Get authorization token with request headers. - - Args: - verbose (bool, optional): Switch on/off for logging messages. Defaults to False. + def headers(self) -> dict[str, Any]: + """Get request headers. Returns: Dict[str, Any]: Request headers with token. """ - CLIENT_ID = self.credentials.get("CLIENT_ID", "") - CLIENT_SECRET = self.credentials.get("CLIENT_SECRET", "") + client_id = self.credentials.get("client_id", "") + client_secret = self.credentials.get("client_secret", "") authorization = base64.b64encode( - bytes(CLIENT_ID + ":" + CLIENT_SECRET, "ISO-8859-1") + bytes(client_id + ":" + client_secret, "ISO-8859-1") ).decode("ascii") request_headers = { "Authorization": f"Basic {authorization}", @@ -126,432 +154,688 @@ def authorization_token(self, verbose: bool = False) -> Dict[str, Any]: method="POST", timeout=3600, ) - if verbose: - if response.status_code == 200: - self.logger.info("Temporary authorization token was generated.") - else: - self.logger.info( - f"Failure: { str(response.status_code) } - { response.reason }" - ) + + if response.ok: + self.logger.info("Temporary authorization token was generated.") + else: + self.logger.info( + f"Failure: { response.status_code !s} - { response.reason }" + ) response_json = response.json() - request_headers = { - "Authorization": f"{ response_json['token_type'] } { response_json['access_token']}", + + return { + "Authorization": f"{ response_json['token_type'] }" + + f" { response_json['access_token']}", "Content-Type": "application/json", } - return request_headers - - def genesys_generate_exports( - self, post_data_list: List[str], end_point: str = "reporting/exports" - ) -> Optional[dict]: - """Function that make POST request method to generate export reports. + def _api_call( + self, + endpoint: str, + post_data_list: list[str], + method: str, + params: dict[str, Any] | None = None, + sleep_time: float = 0.5, + ) -> dict[str, Any]: + """General method to connect to Genesys Cloud API and generate the response. Args: - post_data_list (List[str], optional): List of string templates to generate json body. Defaults to None. - end_point (str, optional): Final end point for Genesys connection. Defaults to "reporting/exports". + endpoint (str): Final end point to the API. + post_data_list (List[str]): List of string templates to generate json body. + method (str): Type of connection to the API. Defaults to "POST". + params (Optional[Dict[str, Any]], optional): Parameters to be passed into + the POST call. Defaults to None. + sleep_time (int, optional): The time, in seconds, to sleep the call to the + API. Defaults to 0.5. + + Raises: + RuntimeError: There is no current event loop in asyncio thread. Returns: - Optional[dict]: Dict when the "conversations" endpoint is called, otherwise returns None. + Dict[str, Any]: Genesys Cloud API response. When the endpoint requires to + create a report within Genesys Cloud, the response is just useless + information. The useful data must be downloaded from apps.{environment} + through another requests. """ - limiter = AsyncLimiter(2, 15) semaphore = asyncio.Semaphore(value=1) + url = f"https://api.{self.environment}/api/v2/{endpoint}" async def generate_post(): - cnt = 0 - for data_to_post in post_data_list: - if cnt < 10: - payload = json.dumps(data_to_post) - async with aiohttp.ClientSession() as session: - await semaphore.acquire() - async with limiter: + payload = json.dumps(data_to_post) + + async with aiohttp.ClientSession() as session: + await semaphore.acquire() + + async with limiter: + if method == "POST": async with session.post( - f"https://api.{self.environment}/api/v2/analytics/{end_point}", - headers=self.authorization_token, + url, + headers=self.headers, data=payload, ) as resp: - global new_report - new_report = await resp.read() - self.logger.info( - f"Generated report export --- \n {payload}." - ) + # global new_report + self.new_report = await resp.read() + message = "Generated report export ---" + if self.verbose: + message += f"\n {payload}." + self.logger.info(message) + semaphore.release() - cnt += 1 - else: - await asyncio.sleep(3) - cnt = 0 + + elif method == "GET": + async with session.get( + url, + headers=self.headers, + params=params, + ) as resp: + self.new_report = await resp.read() + message = "Connecting to Genesys Cloud" + if self.verbose: + message += f": {params}." + self.logger.info(message) + + semaphore.release() + + await asyncio.sleep(sleep_time) try: loop = asyncio.get_event_loop() - except RuntimeError as e: - if str(e).startswith("There is no current event loop in thread"): + except RuntimeError as err: + if str(err).startswith("There is no current event loop in thread"): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) else: - raise e - + raise coroutine = generate_post() loop.run_until_complete(coroutine) - if end_point == "conversations/details/query": - return json.loads(new_report.decode("utf-8")) + return json.loads(self.new_report.decode("utf-8")) - def load_reporting_exports( - self, page_size: int = 100, verbose: bool = False - ) -> Dict[str, Any]: - """ - GET method for reporting export. + def _load_reporting_exports( + self, + page_size: int = 100, + ) -> dict[str, Any]: + """Consult the status of the reports created in Genesys Cloud. Args: - page_size (int, optional): The number of items on page to print. Defaults to 100. - verbose (bool, optional): Switch on/off for logging messages. Defaults to False. + page_size (int, optional): The number of items on page to print. + Defaults to 100. + verbose (bool, optional): Switch on/off for logging messages. + Defaults to False. + + Raises: + APIError: Failed to loaded the exports from Genesys Cloud. Returns: - Dict[str, Any]: schedule genesys report. + Dict[str, Any]: Schedule genesys report. """ - new_report = handle_api_response( - url=f"https://api.{self.environment}/api/v2/analytics/reporting/exports?pageSize={page_size}", - headers=self.authorization_token, + response = handle_api_response( + url=f"https://api.{self.environment}/api/v2/" + + f"analytics/reporting/exports?pageSize={page_size}", + headers=self.headers, method="GET", ) - if new_report.status_code == 200: - if verbose: - self.logger.info("Succesfully loaded all exports.") - return new_report.json() - else: - self.logger.error(f"Failed to loaded all exports. - {new_report.content}") - raise APIError("Failed to loaded all exports.") + response_ok = 200 + if response.status_code == response_ok: + return response.json() - def get_reporting_exports_data(self) -> None: - """ - Function that generate list of reports metadata for further processing steps. + self.logger.error(f"Failed to loaded all exports. - {response.content}") + msg = "Failed to loaded all exports." + raise APIError(msg) + + def _get_reporting_exports_url(self, entities: list[str]) -> tuple[list[str]]: + """Collect all reports created in Genesys Cloud. + + Args: + entities (List[str]): List of dictionaries with all the reports information + available in Genesys Cloud. Returns: - None + Tuple[List[str]]: A tuple with Lists of IDs and URLs. """ - request_json = self.load_reporting_exports() - - if request_json is not None: - entities = request_json.get("entities") - assert isinstance(entities, list) - if len(entities) != 0: - for entity in entities: - tmp = [ - entity.get("id"), - entity.get("downloadUrl"), - entity.get("filter").get("queueIds", [-1])[0], - entity.get("filter").get("mediaTypes", [-1])[0], - entity.get("viewType"), - entity.get("interval"), - entity.get("status"), - ] - self.report_data.append(tmp) - assert len(self.report_data) > 0 - self.logger.info("Generated list of reports entities.") - - def download_report( + ids = [] + urls = [] + status = [] + for entity in entities: + ids.append(entity.get("id")) + urls.append(entity.get("downloadUrl")) + # entity.get("filter").get("queueIds", [-1])[0], + # entity.get("filter").get("mediaTypes", [-1])[0], + # entity.get("viewType"), + # entity.get("interval"), + status.append(entity.get("status")) + + if "FAILED" in status: + self.logger.error("Some reports have not been successfully created.") + if "RUNNING" in status: + self.logger.warning( + "Some reports are still being created and can not be downloaded." + ) + if self.verbose: + message = "".join( + [f"\t{i} -> {j} \n" for i, j in zip(ids, status, strict=False)] + ) + self.logger.info(f"Report status:\n{message}") + + return ids, urls + + def _delete_report(self, report_id: str) -> None: + """Delete a particular report in Genesys Cloud. + + Args: + report_id (str): Id of the report to be deleted. + """ + delete_response = handle_api_response( + url=f"https://api.{self.environment}/api/v2/" + + f"analytics/reporting/exports/{report_id}", + headers=self.headers, + method="DELETE", + ) + # Ok-ish responses (includes eg. 204 No Content) + ok_response_limit = 300 + if delete_response.status_code < ok_response_limit: + self.logger.info( + f"Successfully deleted report '{report_id}' from Genesys API." + ) + else: + self.logger.error( + f"Failed to delete report '{report_id}' " + + f"from Genesys API. - {delete_response.content}" + ) + + def _download_report( self, report_url: str, - output_file_name: str = None, - file_extension: str = "csv", - path: str = "", - sep: str = "\t", drop_duplicates: bool = True, - ) -> None: - """ - Download report to excel file. + ) -> pd.DataFrame: + """Download report from Genesys Cloud. Args: report_url (str): url to report, fetched from json response. - output_file_name (str, optional): Output file name. Defaults to None. - file_extension (str, optional): Output file extension. Defaults to "xls". - path (str, optional): Path to the generated excel file. Defaults to empty string. - sep (str, optional): Separator in csv file. Defaults to "\t". - drop_duplicates (bool, optional): Decide if drop duplicates. Defaults to True. + drop_duplicates (bool, optional): Decide if drop duplicates. + Defaults to True. Returns: - None + pd.DataFrame: Data in a pandas DataFrame. """ - response_file = handle_api_response( - url=f"{report_url}", headers=self.authorization_token - ) - if output_file_name is None: - final_file_name = f"Genesys_Queue_Metrics_Interval_Export.{file_extension}" + response = handle_api_response(url=f"{report_url}", headers=self.headers) + + # Ok-ish responses (includes eg. 204 No Content) + ok_response_limit = 300 + if response.status_code < ok_response_limit: + self.logger.info( + f"Successfully downloaded report from Genesys API ('{report_url}')." + ) + else: - final_file_name = f"{output_file_name}.{file_extension}" + msg = ( + "Failed to download report from" + + f" Genesys API ('{report_url}'). - {response.content}" + ) + self.logger.error(msg) - df = pd.read_csv(StringIO(response_file.content.decode("utf-8"))) + dataframe = pd.read_csv(StringIO(response.content.decode("utf-8"))) if drop_duplicates is True: - df.drop_duplicates(inplace=True, ignore_index=True) + dataframe.drop_duplicates(inplace=True, ignore_index=True) - df.to_csv(os.path.join(path, final_file_name), index=False, sep=sep) + return dataframe - def download_all_reporting_exports( - self, store_file_names: bool = True, path: str = "" - ) -> List[str]: - """ - Get information form data report and download all files. + def _merge_conversations(self, data_to_merge: list) -> pd.DataFrame: # noqa: C901, PLR0912 + """Merge all the conversations data into a single data frame. Args: - store_file_names (bool, optional): decide whether to store list of names. Defaults to True. - path (str, optional): Path to the generated excel file. Defaults to empty string. + data_to_merge (list): List with all the conversations in json format. + Example for all levels data to merge: + { + "conversations": [ + { + **** LEVEL 0 data **** + "participants": [ + { + **** LEVEL 1 data **** + "sessions": [ + { + "agentBullseyeRing": 1, + **** LEVEL 2 data **** + "mediaEndpointStats": [ + { + **** LEVEL 3 data **** + }, + ], + "metrics": [ + { + **** LEVEL 3 data **** + }, + ], + "segments": [ + { + **** LEVEL 3 data **** + }, + { + **** LEVEL 3 data **** + }, + ], + } + ], + }, + { + "participantId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + **** LEVEL 1 data **** + "sessions": [ + { + **** LEVEL 2 data **** + "mediaEndpointStats": [ + { + **** LEVEL 3 data **** + } + ], + "flow": { + **** LEVEL 2 data **** + }, + "metrics": [ + { + **** LEVEL 3 data **** + }, + ], + "segments": [ + { + **** LEVEL 3 data **** + }, + ], + } + ], + }, + ], + } + ], + "totalHits": 100, + } Returns: - List[str]: All file names of downloaded files. + DataFrame: A single data frame with all the content. """ - file_name_list = [] - temp_ids_mapping = self.ids_mapping - if temp_ids_mapping is None: - self.logger.warning("IDS_MAPPING is not provided in you credentials.") - else: - self.logger.info("IDS_MAPPING loaded from local credential.") + # LEVEL 0 + df0 = pd.json_normalize(data_to_merge) + df0.drop(["participants"], axis=1, inplace=True) + + # LEVEL 1 + df1 = pd.json_normalize( + data_to_merge, + record_path=["participants"], + meta=["conversationId"], + ) + df1.drop(["sessions"], axis=1, inplace=True) + + # LEVEL 2 + df2 = pd.json_normalize( + data_to_merge, + record_path=["participants", "sessions"], + meta=[ + ["participants", "externalContactId"], + ["participants", "participantId"], + ], + errors="ignore", + sep="_", + ) + # Columns that will be the reference for the next LEVEL + df2.rename( + columns={ + "participants_externalContactId": "externalContactId", + "participants_participantId": "participantId", + }, + inplace=True, + ) + for key in ["metrics", "segments", "mediaEndpointStats"]: + try: + df2.drop([key], axis=1, inplace=True) + except KeyError as err: + self.logger.info(f"Key {err} not appearing in the response.") + + # LEVEL 3 + conversations_df = {} + for i, conversation in enumerate(data_to_merge): + # Not all "sessions" have the same data, and that creates + # problems of standardization + # Empty data will be added to columns where there is not to avoid + # future errors. + for j, entry_0 in enumerate(conversation["participants"]): + for key in list(entry_0.keys()): + if key == "sessions": + for k, entry_1 in enumerate(entry_0[key]): + if "metrics" not in list(entry_1.keys()): + conversation["participants"][j][key][k]["metrics"] = [] + if "segments" not in list(entry_1.keys()): + conversation["participants"][j][key][k]["segments"] = [] + if "mediaEndpointStats" not in list(entry_1.keys()): + conversation["participants"][j][key][k][ + "mediaEndpointStats" + ] = [] + + # LEVEL 3 metrics + df3_1 = pd.json_normalize( + conversation, + record_path=["participants", "sessions", "metrics"], + meta=[ + ["participants", "sessions", "sessionId"], + ], + errors="ignore", + record_prefix="metrics_", + sep="_", + ) + df3_1.rename( + columns={"participants_sessions_sessionId": "sessionId"}, inplace=True + ) - for single_report in self.report_data: - self.logger.info(single_report) - if single_report[-1] == "RUNNING": - self.logger.warning( - "The request is still in progress and will be deleted, consider add more seconds in `view_type_time_sleep` parameter." - ) - continue - elif single_report[-1] == "FAILED": - self.logger.warning( - "This message 'FAILED_GETTING_DATA_FROM_SERVICE' raised during script execution." - ) - continue - elif self.start_date not in single_report[5]: - self.logger.warning( - f"The report with ID {single_report[0]} doesn't match with the interval date that you have already defined. \ - The report won't be downloaded but will be deleted." - ) - continue - - if single_report[4].lower() == "queue_performance_detail_view": - file_name = ( - temp_ids_mapping.get(single_report[2]) + "_" + single_report[3] - ).upper() - elif single_report[4].lower() in [ - "agent_performance_summary_view", - "agent_status_summary_view", - ]: - date = self.start_date.replace("-", "") - file_name = self.view_type.upper() + "_" + f"{date}" - elif single_report[4].lower() in [ - "agent_status_detail_view", - ]: - date = self.start_date.replace("-", "") - file_name = self.view_type.upper() + f"_{next(self.count)}_" + f"{date}" - else: - raise SKIP( - message=f"View type {self.view_type} not defined in viadot, yet..." - ) + # LEVEL 3 segments + df3_2 = pd.json_normalize( + conversation, + record_path=["participants", "sessions", "segments"], + meta=[ + ["participants", "sessions", "sessionId"], + ], + errors="ignore", + record_prefix="segments_", + sep="_", + ) + df3_2.rename( + columns={"participants_sessions_sessionId": "sessionId"}, inplace=True + ) - self.download_report( - report_url=single_report[1], - path=path, - output_file_name=file_name, - file_extension=self.file_extension, + # LEVEL 3 mediaEndpointStats + df3_3 = pd.json_normalize( + conversation, + record_path=["participants", "sessions", "mediaEndpointStats"], + meta=[ + ["participants", "sessions", "sessionId"], + ], + errors="ignore", + record_prefix="mediaEndpointStats_", + sep="_", + ) + df3_3.rename( + columns={"participants_sessions_sessionId": "sessionId"}, inplace=True ) - if store_file_names is True: - file_name_list.append(file_name + "." + self.file_extension) - self.logger.info("Al reports were successfully dowonload.") + # merging all LEVELs 3 from the same conversation + dff3_tmp = pd.concat([df3_1, df3_2]) + dff3 = pd.concat([dff3_tmp, df3_3]) - if store_file_names is True: - self.logger.info("Successfully genetared file names list.") - return file_name_list + conversations_df.update({i: dff3}) - def generate_reporting_export( - self, data_to_post: Dict[str, Any], verbose: bool = False - ) -> int: - """ - POST method for reporting export. + # NERGING ALL LEVELS + # LEVELS 3 + for i_3, key in enumerate(list(conversations_df.keys())): + if i_3 == 0: + dff3_f = conversations_df[key] + else: + dff3_f = pd.concat([dff3_f, conversations_df[key]]) - Args: - data_to_post (Dict[str, Any]): Json format of POST body. - verbose (bool, optional): Decide if enable logging. Defaults to True. + # LEVEL 3 with LEVEL 2 + dff2 = pd.merge(dff3_f, df2, how="outer", on=["sessionId"]) - Returns: - int: Status code. - """ - payload = json.dumps(data_to_post) - new_report = handle_api_response( - url=f"https://api.{self.environment}/api/v2/analytics/reporting/exports", - headers=self.authorization_token, - method="POST", - data=payload, + # LEVEL 2 with LEVEL 1 + dff1 = pd.merge( + df1, dff2, how="outer", on=["externalContactId", "participantId"] ) - if verbose: - if new_report.status_code == 200: - self.logger.info("Succesfully generated new export.") - else: - self.logger.error( - f"Failed to generated new export. - {new_report.content}" - ) - raise APIError("Failed to generated new export.") - return new_report.status_code - def delete_reporting_exports(self, report_id: str) -> int: - """DELETE method for deleting particular reporting exports. + # LEVEL 1 with LEVEL 0 + return pd.merge(df0, dff1, how="outer", on=["conversationId"]) + + # This is way too complicated for what it's doing... + def api_connection( # noqa: PLR0912, PLR0915, C901. + self, + endpoint: str | None = None, + queues_ids: list[str] | None = None, + view_type: str | None = None, + view_type_time_sleep: int = 10, + post_data_list: list[dict[str, Any]] | None = None, + normalization_sep: str = ".", + ) -> None: + """General method to connect to Genesys Cloud API and generate the response. Args: - report_id (str): Defined at the end of report url. + endpoint (Optional[str], optional): Final end point to the API. + Defaults to None. + + Custom endpoints have specific key words, and parameters: + Example: + - "routing/queues/{id}/members": "routing_queues_members" + - members_ids = ["xxxxxxxxx", "xxxxxxxxx", ...] + queues_ids (Optional[List[str]], optional): List of queues ids to consult + the members. Defaults to None. + view_type (Optional[str], optional): The type of view export job to be + created. Defaults to None. + view_type_time_sleep (int, optional): Waiting time to retrieve data from + Genesys Cloud API. Defaults to 10. + post_data_list (Optional[List[Dict[str, Any]]], optional): List of string + templates to generate json body in POST calls to the API. + Defaults to None. + normalization_sep (str, optional): Nested records will generate names + separated by sep. Defaults to ".". - Returns: - int: Status code. + Raises: + APIError: Some or No reports were not created. + APIError: At different endpoints: + - 'analytics/conversations/details/query': only one body must be used. + - 'routing_queues_members': extra parameter `queues_ids` must be + included. """ - delete_method = handle_api_response( - url=f"https://api.{self.environment}/api/v2/analytics/reporting/exports/{report_id}", - headers=self.authorization_token, - method="DELETE", + self.logger.info( + f"Connecting to the Genesys Cloud using the endpoint: {endpoint}" ) - if delete_method.status_code < 300: - self.logger.info("Successfully deleted report from Genesys API.") - else: - self.logger.error( - f"Failed to deleted report from Genesys API. - {delete_method.content}" + if endpoint == "analytics/reporting/exports": + self._api_call( + endpoint=endpoint, + post_data_list=post_data_list, + method="POST", ) - raise APIError("Failed to deleted report from Genesys API.") - return delete_method.status_code + msg = ( + f"Waiting {view_type_time_sleep} seconds for" + + " caching data from Genesys Cloud API." + ) + self.logger.info(msg) + time.sleep(view_type_time_sleep) - def delete_all_reporting_exports(self) -> None: - """ - Function that deletes all reporting from self.reporting_data list. + request_json = self._load_reporting_exports() + entities = request_json["entities"] - Returns: - None - """ - for report in self.report_data: - status_code = self.delete_reporting_exports(report_id=report[0]) - assert status_code < 300 + if isinstance(entities, list) and len(entities) == len(post_data_list): + ids, urls = self._get_reporting_exports_url(entities) + else: + APIError( + "There are no reports to be downloaded." + f"May be {view_type_time_sleep} should be increased." + ) - self.logger.info("Successfully removed all reports.") + # download and delete reports created + count = 0 + raise_api_error = False + for qid, url in zip(ids, urls, strict=False): + if url is not None: + df_downloaded = self._download_report(report_url=url) + + time.sleep(1.0) + # remove resume rows + if view_type in ["queue_performance_detail_view"]: + criteria = ( + df_downloaded["Queue Id"] + .apply(lambda x: str(x).split(";")) + .apply(lambda x: not len(x) > 1) + ) + df_downloaded = df_downloaded[criteria] + + self.data_returned.update({count: df_downloaded}) + else: + self.logger.error( + f"Report id {qid} didn't have time to be created. " + + "Consider increasing the `view_type_time_sleep` parameter " + + f">> {view_type_time_sleep} seconds to allow Genesys Cloud " + + "to conclude the report creation." + ) + raise_api_error = True + + self._delete_report(qid) + + count += 1 # noqa: SIM113 + + if raise_api_error: + msg = "Some reports creation failed." + raise APIError(msg) + + elif endpoint == "analytics/conversations/details/query": + if len(post_data_list) > 1: + msg = "Not available more than one body for this end-point." + raise APIError(msg) + + stop_loop = False + page_counter = post_data_list[0]["paging"]["pageNumber"] + self.logger.info( + "Restructuring the response in order to be able to insert it into a " + + "data frame.\n\tThis task could take a few minutes.\n" + ) + while not stop_loop: + report = self._api_call( + endpoint=endpoint, + post_data_list=post_data_list, + method="POST", + ) - def get_analitics_url_report(self) -> str: - """ - Fetching analytics report url from json response. + merged_data_frame = self._merge_conversations(report["conversations"]) + self.data_returned.update( + { + int(post_data_list[0]["paging"]["pageNumber"]) + - 1: merged_data_frame + } + ) - Returns: - str: Url for analytics report - """ - response = handle_api_response( - url=f"https://api.{self.environment}/api/v2/analytics/reporting/schedules/{self.schedule_id}", - headers=self.authorization_token, - ) - try: - response_json = response.json() - report_url = response_json.get("lastRun", None).get("reportUrl", None) - self.logger.info("Successfully downloaded report from genesys api") - return report_url - except AttributeError as e: - self.logger.error( - "Output data error: " + str(type(e).__name__) + ": " + str(e) - ) + if page_counter == 1: + max_calls = int(np.ceil(report["totalHits"] / 100)) + if page_counter == max_calls: + stop_loop = True - def get_all_schedules_job(self) -> Dict[str, Any]: - """ - Fetching analytics report url from json response. + post_data_list[0]["paging"]["pageNumber"] += 1 + page_counter += 1 - Returns: - Dict[str, Any]: Json body with all schedules jobs. - """ - response = handle_api_response( - url=f"https://api.{self.environment}/api/v2/analytics/reporting/schedules", - headers=self.authorization_token, - ) - try: - response_json = response.json() - self.logger.info("Successfully downloaded schedules jobs.") - return response_json - except AttributeError as e: - self.logger.error( - "Output data error: " + str(type(e).__name__) + ": " + str(e) + elif endpoint in ["routing/queues", "users"]: + page = 1 + self.logger.info( + "Restructuring the response in order to be able to insert it into a " + + "data frame.\n\tThis task could take a few minutes.\n" ) + while True: + if endpoint == "routing/queues": + params = {"pageSize": 500, "pageNumber": page} + elif endpoint == "users": + params = { + "pageSize": 500, + "pageNumber": page, + "expand": "presence,dateLastLogin,groups" + + ",employerInfo,lasttokenissued", + "state": "any", + } + response = self._api_call( + endpoint=endpoint, + post_data_list=post_data_list, + method="GET", + params=params, + ) - def schedule_report(self, data_to_post: Dict[str, Any]) -> int: - """ - POST method for report scheduling. - - Args: - data_to_post (Dict[str, Any]): Json format of POST body. + if response["entities"]: + df_response = pd.json_normalize( + response["entities"], + sep=normalization_sep, + ) + self.data_returned.update({page - 1: df_response}) - Returns: - int: status code - """ - payload = json.dumps(data_to_post) - new_report = handle_api_response( - url=f"https://api.{self.environment}/api/v2/analytics/reporting/schedules", - headers=self.authorization_token, - method="POST", - data=payload, - ) - if new_report.status_code == 200: - self.logger.info("Succesfully scheduled new report.") - else: - self.logger.error(f"Failed to scheduled new report. - {new_report.content}") - raise APIError("Failed to scheduled new report.") - return new_report.status_code + page += 1 + else: + break + elif endpoint == "routing_queues_members": + counter = 0 + if queues_ids is None: + self.logger.error( + "This endpoint requires `queues_ids` parameter to work." + ) + APIError("This endpoint requires `queues_ids` parameter to work.") + + for qid in queues_ids: + self.logger.info(f"Downloading Agents information from Queue: {qid}") + page = 1 + while True: + response = self._api_call( + endpoint=f"routing/queues/{qid}/members", + params={"pageSize": 100, "pageNumber": page}, + post_data_list=post_data_list, + method="GET", + ) + + if response["entities"]: + df_response = pd.json_normalize(response["entities"]) + # drop personal information + columns_to_drop = { + "user.addresses", + "user.primaryContactInfo", + "user.images", + }.intersection(df_response.columns) + df_response.drop( + columns_to_drop, + axis=1, + inplace=True, + ) + self.data_returned.update({counter: df_response}) + + page += 1 + counter += 1 + else: + break + + @add_viadot_metadata_columns def to_df( self, - report_url: str = None, - tests: dict = None, + if_empty: str = "warn", + **kwargs, ) -> pd.DataFrame: - """ - Download genesys data into a pandas DataFrame. + """Generate a pandas DataFrame from self.data_returned. Args: - report_url (str): Report url from api response. - tests (Dict[str], optional): A dictionary with optional list of tests - to verify the output dataframe. If defined, triggers the `validate` - function from utils. Defaults to None. + drop_duplicates (bool, optional): Remove duplicates from the DataFrame. + Defaults to False. + validate_df_dict (Optional[Dict[str, Any]], optional): A dictionary with + optional list of tests to verify the output dataframe. Defaults to None. Returns: - pd.DataFrame: The DataFrame with time range. + pd.Dataframe: The response data as a pandas DataFrame plus viadot metadata. """ - if report_url is None: - report_url = self.get_analitics_url_report() - response_file = handle_api_response( - url=f"{report_url}", headers=self.authorization_token - ) - if self.report_columns is None: - df = pd.read_excel(response_file.content, header=6) - else: - df = pd.read_excel( - response_file.content, names=self.report_columns, skiprows=6 - ) - - if tests: - validate(df=df, tests=tests) - - return df + drop_duplicates = kwargs.get("drop_duplicates", False) + validate_df_dict = kwargs.get("validate_df_dict", None) + super().to_df(if_empty=if_empty) - def delete_scheduled_report_job(self, report_id: str) -> int: - """ - DELETE method for deleting particular report job. + for key in list(self.data_returned.keys()): + if key == 0: + data_frame = self.data_returned[key] + else: + data_frame = pd.concat([data_frame, self.data_returned[key]]) - Args: - report_id (str): Defined at the end of report url. + if drop_duplicates: + data_frame.drop_duplicates(inplace=True) - Returns: - int: Status code. - """ - delete_method = handle_api_response( - url=f"https://api.{self.environment}/api/v2/analytics/reporting/schedules/{report_id}", - headers=self.authorization_token, - method="DELETE", - ) - if delete_method.status_code == 200: - self.logger.info("Successfully deleted report from Genesys API.") + if validate_df_dict: + validate(df=data_frame, tests=validate_df_dict) - else: - self.logger.error( - f"Failed to deleted report from Genesys API. - {delete_method.content}" + if len(self.data_returned) == 0: + data_frame = pd.DataFrame() + self._handle_if_empty( + if_empty=if_empty, + message="The response does not contain any data.", ) - raise APIError("Failed to deleted report from Genesys API.") + else: + data_frame.reset_index(inplace=True, drop=True) - return delete_method.status_code + return data_frame diff --git a/src/viadot/sources/hubspot.py b/src/viadot/sources/hubspot.py new file mode 100644 index 000000000..915767e3e --- /dev/null +++ b/src/viadot/sources/hubspot.py @@ -0,0 +1,376 @@ +"""Hubspot API connector.""" + +from datetime import datetime +import json +import re +from typing import Any + +import pandas as pd +from pydantic import BaseModel + +from viadot.config import get_source_credentials +from viadot.exceptions import APIError, CredentialError +from viadot.sources.base import Source +from viadot.utils import add_viadot_metadata_columns, handle_api_response + + +class HubspotCredentials(BaseModel): + """Checking for values in Hubspot credentials dictionary. + + One key value is held in the Hubspot connector: + - token: The unique string characters to be identified. + + Args: + BaseModel (pydantic.main.ModelMetaclass): A base class for creating + Pydantic models. + """ + + token: str + + +class Hubspot(Source): + """A class that connects and extracts data from Hubspot API. + + Documentation is available here: + https://developers.hubspot.com/docs/api/crm/understanding-the-crm. + + Connector allows to pull data in two ways: + - using base API for crm schemas as an endpoint + (eg. "contacts", ""line_items", "deals", ...), + - using full url as endpoint. + """ + + API_URL = "https://api.hubapi.com" + + def __init__( + self, + *args, + credentials: HubspotCredentials | None = None, + config_key: str = "hubspot", + **kwargs, + ): + """Create an instance of Hubspot. + + Args: + credentials (Optional[HubspotCredentials], optional): Hubspot credentials. + Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to "hubspot". + + Examples: + hubspot = Hubspot( + credentials=credentials, + config_key=config_key, + ) + hubspot.api_connection( + endpoint=endpoint, + filters=filters, + properties=properties, + nrows=nrows, + ) + data_frame = hubspot.to_df() + + Raises: + CredentialError: If credentials are not provided in local_config or + directly as a parameter. + """ + credentials = credentials or get_source_credentials(config_key) or None + if credentials is None: + msg = "Missing credentials." + raise CredentialError(msg) + self.credentials = credentials + + validated_creds = dict(HubspotCredentials(**credentials)) + super().__init__(*args, credentials=validated_creds, **kwargs) + + self.full_dataset = None + + def _date_to_unixtimestamp(self, date: str | None = None) -> int: + """Convert date from "yyyy-mm-dd" to Unix Timestamp. + + (SECONDS SINCE JAN 01 1970. (UTC)). For example: + 1680774921 SECONDS SINCE JAN 01 1970. (UTC) -> 11:55:49 AM 2023-04-06. + + Args: + date (Optional[str], optional): Input date in format "yyyy-mm-dd". + Defaults to None. + + Returns: + int: Number of seconds that passed since 1970-01-01 until "date". + """ + return int(datetime.timestamp(datetime.strptime(date, "%Y-%m-%d")) * 1000) + + def _get_api_url( + self, + endpoint: str | None = None, + filters: dict[str, Any] | None = None, + properties: list[Any] | None = None, + ) -> str: + """Generates full url for Hubspot API given filters and parameters. + + Args: + endpoint (Optional[str], optional): API endpoint for an individual request. + Defaults to None. + filters (Optional[Dict[str, Any]], optional): Filters defined for the API + body in specific order. Defaults to None. + properties (Optional[List[Any]], optional): List of user-defined columns to + be pulled from the API. Defaults to None. + + Returns: + str: The final URL API. + """ + if self.API_URL in endpoint: + url = endpoint + elif endpoint.startswith("hubdb"): + url = f"{self.API_URL}/{endpoint}" + else: + if filters: + url = f"{self.API_URL}/crm/v3/objects/{endpoint}/search/?limit=100&" + else: + url = f"{self.API_URL}/crm/v3/objects/{endpoint}/?limit=100&" + + if properties and len(properties) > 0: + url += f'properties={",".join(properties)}&' + + return url + + def _format_filters( + self, + filters: list[dict[str, Any]] | None, + ) -> list[dict[str, Any]]: + """API body (filters) conversion from a user defined to API language. + + Note: Right now only converts date to Unix Timestamp. + + Args: + filters (Optional[List[Dict[str, Any]]]): List of filters in JSON format. + + Returns: + List[Dict[str, Any]]: List of cleaned filters in JSON format. + """ + for item in filters: + for subitem in item["filters"]: + for key in list(subitem.keys()): + lookup = subitem[key] + regex = re.findall(r"\d+-\d+-\d+", lookup) + if regex: + regex = self._date_to_unixtimestamp(lookup) + subitem[key] = f"{regex}" + + return filters + + def _get_api_body(self, filters: list[dict[str, Any]]): + """Clean the filters body and converts to a JSON formatted value. + + Args: + filters (List[Dict[str, Any]]): Filters dictionary that will be passed to + Hubspot API. Defaults to {}. + + Example: + filters = { + "filters": [ + { + "propertyName": "createdate", + "operator": "BETWEEN", + "highValue": "2023-03-27", + "value": "2023-03-26" + } + ] + } + Operators between the min and max value are listed below: + [IN, NOT_HAS_PROPERTY, LT, EQ, GT, NOT_IN, GTE, CONTAINS_TOKEN, + HAS_PROPERTY, LTE, NOT_CONTAINS_TOKEN, BETWEEN, NEQ] + LT - Less than + LTE - Less than or equal to + GT - Greater than + GTE - Greater than or equal to + EQ - Equal to + NEQ - Not equal to + BETWEEN - Within the specified range. In your request, use key-value + pairs to set highValue and value. Refer to the example above. + IN - Included within the specified list. This operator is + case-sensitive, so inputted values must be in lowercase. + NOT_IN - Not included within the specified list + HAS_PROPERTY - Has a value for the specified property + NOT_HAS_PROPERTY - Doesn't have a value for the specified property + CONTAINS_TOKEN - Contains a token. In your request, you can use + wildcards (*) to complete a partial search. For example, use the + value *@hubspot.com to retrieve contacts with a HubSpot email + address. + NOT_CONTAINS_TOKEN -Doesn't contain a token + + Returns: + Dict: Filters with a JSON format. + """ + return json.dumps({"filterGroups": filters, "limit": 100}) + + def _api_call( + self, + url: str | None = None, + body: str | None = None, + method: str | None = None, + ) -> dict | None: + """General method to connect to Hubspot API and generate the response. + + Args: + url (Optional[str], optional): Hubspot API url. Defaults to None. + body (Optional[str], optional): Filters that will be pushed to the API body. + Defaults to None. + method (Optional[str], optional): Method of the API call. Defaults to None. + + Raises: + APIError: When the `status_code` is different to 200. + + Returns: + Dict: API response in JSON format. + """ + headers = { + "Authorization": f'Bearer {self.credentials["token"]}', + "Content-Type": "application/json", + } + + response = handle_api_response( + url=url, headers=headers, data=body, method=method + ) + + response_ok = 200 + if response.status_code == response_ok: + return response.json() + + self.logger.error(f"Failed to load response content. - {response.content}") + msg = "Failed to load all exports." + raise APIError(msg) + + def _get_offset_from_response( + self, api_response: dict[str, Any] + ) -> tuple[str] | None: + """Assign offset type/value depending on keys in API response. + + Args: + api_response (Dict[str, Any]): API response in JSON format. + + Returns: + tuple: Tuple in order: (offset_type, offset_value) + """ + if "paging" in api_response: + offset_type = "after" + offset_value = api_response["paging"]["next"][f"{offset_type}"] + + elif "offset" in api_response: + offset_type = "offset" + offset_value = api_response["offset"] + + else: + offset_type = None + offset_value = None + + return (offset_type, offset_value) + + def api_connection( + self, + endpoint: str | None = None, + filters: list[dict[str, Any]] | None = None, + properties: list[Any] | None = None, + nrows: int = 1000, + ) -> None: + """General method to connect to Hubspot API and generate the response. + + Args: + endpoint (Optional[str], optional): API endpoint for an individual request. + Defaults to None. + filters (Optional[List[Dict[str, Any]]], optional): Filters defined for the + API body in specific order. Defaults to None. + + Example: + filters=[ + { + "filters": [ + { + "propertyName": "createdate", + "operator": "BETWEEN", + "highValue": "1642636800000", + "value": "1641995200000", + }, + { + "propertyName": "email", + "operator": "CONTAINS_TOKEN", + "value": "*@xxxx.xx", + }, + ] + } + ], + properties (Optional[List[Any]], optional): List of user-defined columns to + be pulled from the API. Defaults to None. + nrows (int, optional): Max number of rows to pull during execution. + Defaults to 1000. + + Raises: + APIError: Failed to download data from the endpoint. + """ + url = self._get_api_url( + endpoint=endpoint, + filters=filters, + properties=properties, + ) + if filters: + filters_formatted = self._format_filters(filters) + body = self._get_api_body(filters=filters_formatted) + method = "POST" + partition = self._api_call(url=url, body=body, method=method) + self.full_dataset = partition["results"] + + while "paging" in partition and len(self.full_dataset) < nrows: + body = json.loads(self._get_api_body(filters=filters_formatted)) + body["after"] = partition["paging"]["next"]["after"] + partition = self._api_call( + url=url, body=json.dumps(body), method=method + ) + self.full_dataset.extend(partition["results"]) + + else: + method = "GET" + partition = self._api_call(url=url, method=method) + self.full_dataset = partition[next(iter(partition.keys()))] + + offset_type, offset_value = self._get_offset_from_response(partition) + + while offset_value and len(self.full_dataset) < nrows: + url = self._get_api_url( + endpoint=endpoint, + properties=properties, + filters=filters, + ) + url += f"{offset_type}={offset_value}" + + partition = self._api_call(url=url, method=method) + self.full_dataset.extend(partition[next(iter(partition.keys()))]) + + offset_type, offset_value = self._get_offset_from_response(partition) + + @add_viadot_metadata_columns + def to_df( + self, + if_empty: str = "warn", + ) -> pd.DataFrame: + """Generate a pandas DataFrame with the data in the Response and metadata. + + Args: + if_empty (str, optional): What to do if a fetch produce no data. + Defaults to "warn + + Returns: + pd.Dataframe: The response data as a pandas DataFrame plus viadot metadata. + """ + super().to_df(if_empty=if_empty) + + data_frame = pd.json_normalize(self.full_dataset) + + if data_frame.empty: + self._handle_if_empty( + if_empty=if_empty, + message="The response does not contain any data.", + ) + else: + self.logger.info("Successfully downloaded data from the Mindful API.") + + return data_frame diff --git a/src/viadot/sources/mindful.py b/src/viadot/sources/mindful.py new file mode 100644 index 000000000..54fe2889a --- /dev/null +++ b/src/viadot/sources/mindful.py @@ -0,0 +1,216 @@ +"""Mindful API connector.""" + +from datetime import date, timedelta +from io import StringIO +import json +from typing import Any, Literal + +import pandas as pd +from pydantic import BaseModel +from requests.auth import HTTPBasicAuth +from requests.models import Response + +from viadot.config import get_source_credentials +from viadot.exceptions import APIError, CredentialError +from viadot.sources.base import Source +from viadot.utils import add_viadot_metadata_columns, handle_api_response + + +class MindfulCredentials(BaseModel): + """Checking for values in Mindful credentials dictionary. + + Two key values are held in the Mindful connector: + - customer_uuid: The unique ID for the organization. + - auth_token: A unique token to be used as the password for API requests. + + Args: + BaseModel (pydantic.main.ModelMetaclass): A base class for creating + Pydantic models. + """ + + customer_uuid: str + auth_token: str + + +class Mindful(Source): + """Class implementing the Mindful API. + + Documentation for this API is available at: https://apidocs.surveydynamix.com/. + """ + + ENDPOINTS = ("interactions", "responses", "surveys") + + def __init__( + self, + *args, + credentials: MindfulCredentials | None = None, + config_key: str = "mindful", + region: Literal["us1", "us2", "us3", "ca1", "eu1", "au1"] = "eu1", + **kwargs, + ): + """Create a Mindful instance. + + Args: + credentials (Optional[MindfulCredentials], optional): Mindful credentials. + Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to "mindful". + region (Literal[us1, us2, us3, ca1, eu1, au1], optional): Survey Dynamix + region from where to interact with the mindful API. Defaults to "eu1" + English (United Kingdom). + + Examples: + mindful = Mindful( + credentials=credentials, + config_key=config_key, + region=region, + ) + mindful.api_connection( + endpoint=endpoint, + date_interval=date_interval, + limit=limit, + ) + data_frame = mindful.to_df() + """ + credentials = credentials or get_source_credentials(config_key) or None + if credentials is None: + msg = "Missing credentials." + raise CredentialError(msg) + + validated_creds = dict(MindfulCredentials(**credentials)) + super().__init__(*args, credentials=validated_creds, **kwargs) + + self.auth = (credentials["customer_uuid"], credentials["auth_token"]) + if region != "us1": + self.region = f"{region}." + else: + self.region = "" + + def _mindful_api_response( + self, + params: dict[str, Any] | None = None, + endpoint: str = "", + ) -> Response: + """Call to Mindful API given an endpoint. + + Args: + params (Optional[Dict[str, Any]], optional): Parameters to be passed into + the request. Defaults to None. + endpoint (str, optional): API endpoint for an individual request. + Defaults to "". + + Returns: + Response: request object with the response from the Mindful API. + """ + return handle_api_response( + url=f"https://{self.region}surveydynamix.com/api/{endpoint}", + params=params, + method="GET", + auth=HTTPBasicAuth(*self.auth), + ) + + def api_connection( + self, + endpoint: Literal["interactions", "responses", "surveys"] = "surveys", + date_interval: list[date] | None = None, + limit: int = 1000, + ) -> None: + """General method to connect to Survey Dynamix API and generate the response. + + Args: + endpoint (Literal["interactions", "responses", "surveys"], optional): API + endpoint for an individual request. Defaults to "surveys". + date_interval (Optional[List[date]], optional): Date time range detailing + the starting date and the ending date. If no range is passed, one day of + data since this moment will be retrieved. Defaults to None. + limit (int, optional): The number of matching interactions to return. + Defaults to 1000. + + Raises: + ValueError: Not available endpoint. + APIError: Failed to download data from the endpoint. + """ + if endpoint not in self.ENDPOINTS: + raise ValueError( + f"Survey Dynamix endpoint: '{endpoint}'," + + " is not available through Mindful viadot connector." + ) + + if ( + date_interval is None + or all(list(map(isinstance, date_interval, [date] * len(date_interval)))) + is False + ): + reference_date = date.today() + date_interval = [reference_date - timedelta(days=1), reference_date] + + self.logger.warning( + "No `date_interval` parameter was defined, or was erroneously " + + "defined. `date_interval` parameter must have the folloing " + + "structure:\n\t[`date_0`, `date_1`], having that `date_1` > " + + "`date_0`.\nBy default, one day of data, from " + + f"{date_interval[0].strftime('%Y-%m-%d')} to " + + f"{date_interval[1].strftime('%Y-%m-%d')}, will be obtained." + ) + + params = { + "_limit": limit, + "start_date": f"{date_interval[0]}", + "end_date": f"{date_interval[1]}", + } + + if endpoint == "surveys": + del params["start_date"] + del params["end_date"] + + response = self._mindful_api_response( + endpoint=endpoint, + params=params, + ) + response_ok = 200 + no_data_code = 204 + if response.status_code == response_ok: + self.logger.info( + f"Successfully downloaded '{endpoint}' data from mindful API." + ) + self.data = StringIO(response.content.decode("utf-8")) + elif response.status_code == no_data_code and not response.content.decode(): + self.logger.warning( + f"There are not '{endpoint}' data to download from" + + f" {date_interval[0]} to {date_interval[1]}." + ) + self.data = json.dumps({}) + else: + self.logger.error( + f"Failed to downloaded '{endpoint}' data. - {response.content}" + ) + msg = f"Failed to downloaded '{endpoint}' data." + raise APIError(msg) + + @add_viadot_metadata_columns + def to_df( + self, + if_empty: str = "warn", + ) -> pd.DataFrame: + """Download the data to a pandas DataFrame. + + Args: + if_empty (str, optional): What to do if a fetch produce no data. + Defaults to "warn + + Returns: + pd.Dataframe: The response data as a pandas DataFrame plus viadot metadata. + """ + super().to_df(if_empty=if_empty) + + data_frame = pd.read_json(self.data) + + if data_frame.empty: + self._handle_if_empty( + if_empty=if_empty, + message="The response does not contain any data.", + ) + else: + self.logger.info("Successfully downloaded data from the Mindful API.") + + return data_frame diff --git a/src/viadot/sources/minio.py b/src/viadot/sources/minio.py index 30676d1db..5ac75251e 100644 --- a/src/viadot/sources/minio.py +++ b/src/viadot/sources/minio.py @@ -1,14 +1,24 @@ +"""A module for interacting with MinIO.""" + +from collections.abc import Generator from pathlib import Path -from typing import Generator, Literal +from typing import Literal import pandas as pd import pyarrow as pa import pyarrow.parquet as pq -import s3fs -import urllib3 -from minio import Minio -from minio.error import S3Error + + +try: + from minio import Minio + from minio.error import S3Error + import s3fs +except ModuleNotFoundError as e: + msg = "Missing required modules to use MinIO source." + raise ImportError(msg) from e + from pydantic import BaseModel +import urllib3 from urllib3.exceptions import NewConnectionError from viadot.config import get_source_credentials @@ -25,22 +35,23 @@ class MinIOCredentials(BaseModel): class MinIO(Source): - """ - A class for interacting with MinIO, in a more Pythonic, user-friendly, and robust - way than the official minio client. - - Args: - credentials (MinIOCredentials): MinIO credentials. - config_key (str, optional): The key in the viadot config holding relevant credentials. - """ - def __init__( self, - credentials: MinIOCredentials = None, - config_key: str = None, + credentials: MinIOCredentials | None = None, + config_key: str | None = None, *args, **kwargs, ): + """A class for interacting with MinIO. + + Interact with MinIO in a more Pythonic, user-friendly, and robust way than the + official minio client. + + Args: + credentials (MinIOCredentials): MinIO credentials. + config_key (str, optional): The key in the viadot config holding relevant + credentials. + """ raw_creds = credentials or get_source_credentials(config_key) or {} validated_creds = MinIOCredentials(**raw_creds).dict( by_alias=True @@ -98,11 +109,11 @@ def from_arrow( schema_name: str | None = None, table_name: str | None = None, path: str | Path | None = None, + basename_template: str | None = None, partition_cols: list[str] | None = None, if_exists: Literal["error", "delete_matching", "overwrite_or_ignore"] = "error", - ): - """ - Create a Parquet dataset on MinIO from a PyArrow Table. + ) -> None: + """Create a Parquet dataset on MinIO from a PyArrow Table. Uses multi-part upload to upload the table in chunks, speeding up the process by using multithreading and avoiding upload size limits. @@ -114,7 +125,7 @@ def from_arrow( `s3://///.parquet`. For more information on partitioning, see - https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_to_dataset.html#pyarrow-parquet-write-to-dataset # noqa + https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_to_dataset.html#pyarrow-parquet-write-to-dataset Args: table (pa.Table): The table to upload. @@ -124,18 +135,20 @@ def from_arrow( None. path (str | Path, optional): The path to the destination file. Defaults to None. + basename_template (str, optional): A template string used to generate + basenames of written data files. The token '{i}' will be replaced with + an automatically incremented integer. Defaults to None. partition_cols (list[str], optional): The columns to partition by. Defaults to None. if_exists (Literal["error", "delete_matching", "overwrite_or_ignore"], - optional). What to do if the dataset already exists. + optional): What to do if the dataset already exists. """ fqn_or_path = (schema_name and table_name) or ( path and not (schema_name or table_name) ) if not fqn_or_path: - raise ValueError( - "Either both `schema_name` and `table_name` or only `path` must be provided." - ) + msg = "Either both `schema_name` and `table_name` or only `path` must be provided." + raise ValueError(msg) # We need to create the dirs here as PyArrow also tries to create the bucket, # which shouldn't be allowed for whomever is executing this code. @@ -150,6 +163,7 @@ def from_arrow( root_path=path, partition_cols=partition_cols, existing_data_behavior=if_exists, + basename_template=basename_template, filesystem=self.fs, max_rows_per_file=1024 * 1024, create_dir=False, # Required as Arrow attempts to create the bucket, too. @@ -161,11 +175,11 @@ def from_df( schema_name: str | None = None, table_name: str | None = None, path: str | Path | None = None, + basename_template: str | None = None, partition_cols: list[str] | None = None, if_exists: Literal["error", "delete_matching", "overwrite_or_ignore"] = "error", ) -> None: - """ - Create a Parquet dataset on MinIO from a PyArrow Table. + """Create a Parquet dataset on MinIO from a PyArrow Table. Uses multi-part upload to upload the table in chunks, speeding up the process by using multithreading and avoiding upload size limits. @@ -177,7 +191,7 @@ def from_df( `s3://///.parquet`. For more information on partitioning, see - https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_to_dataset.html#pyarrow-parquet-write-to-dataset # noqa + https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_to_dataset.html#pyarrow-parquet-write-to-dataset Args: df (pd.DataFrame): The DataFrame to upload. @@ -187,10 +201,13 @@ def from_df( None. path (str | Path, optional): The path to the destination file. Defaults to None. + basename_template (str, optional): A template string used to generate + basenames of written data files. The token '{i}' will be replaced with + an automatically incremented integer. Defaults to None. partition_cols (list[str], optional): The columns to partition by. Defaults to None. if_exists (Literal["error", "delete_matching", "overwrite_or_ignore"], - optional). What to do if the dataset already exists. + optional): What to do if the dataset already exists. """ table = pa.Table.from_pandas(df) @@ -199,13 +216,13 @@ def from_df( schema_name=schema_name, table_name=table_name, path=path, + basename_template=basename_template, partition_cols=partition_cols, if_exists=if_exists, ) def ls(self, path: str) -> Generator[str, None, None]: - """ - List files and directories under `path`. + """List files and directories under `path`. List operation can be slow if there are a lot of objects, hence using a generator. @@ -220,8 +237,7 @@ def ls(self, path: str) -> Generator[str, None, None]: yield obj.object_name def rm(self, path: str, recursive: bool = False) -> None: - """ - Remove a file or directory from MinIO. + """Remove a file or directory from MinIO. Args: path (str): The path to the file to remove. @@ -239,24 +255,23 @@ def rm(self, path: str, recursive: bool = False) -> None: def _check_if_file_exists(self, path: str) -> bool: try: self.client.stat_object(self.bucket, path) - return True except S3Error as e: if "Object does not exist" in e.message: return False - else: - raise e + raise + else: + return True def check_connection(self) -> None: """Verify connectivity to the MinIO endpoint.""" try: self.client.bucket_exists(self.bucket) except NewConnectionError as e: - raise ValueError( - f"Connection to MinIO endpoint '{self.endpoint}' failed with error: \n{e}", - "Please check your credentials and try again.", - ) + msg = f"Connection to MinIO endpoint '{self.endpoint}' failed with error: \n{e}" + msg += "Please check your credentials and try again." + + raise ValueError(msg) from e except Exception as e: - raise ValueError( - f"Connection to MinIO endpoint '{self.endpoint}' failed with error: \n{e}" - ) + msg = f"Connection to MinIO endpoint '{self.endpoint}' failed with error: \n{e}" + raise ValueError(msg) from e self.logger.info("Connection successful!") diff --git a/src/viadot/sources/outlook.py b/src/viadot/sources/outlook.py new file mode 100644 index 000000000..5df9737f3 --- /dev/null +++ b/src/viadot/sources/outlook.py @@ -0,0 +1,355 @@ +"""Module for fetching data from the Outlook API.""" + +from datetime import date, datetime, timedelta, timezone +from typing import Any + +from O365 import Account +from O365.mailbox import MailBox +import pandas as pd +from pydantic import BaseModel + +from viadot.config import get_source_credentials +from viadot.exceptions import CredentialError +from viadot.sources.base import Source +from viadot.utils import add_viadot_metadata_columns + + +class OutlookCredentials(BaseModel): + """Checking for values in Outlook credentials dictionary. + + Two key values are held in the Outlook connector: + - client_id: + - client_secret: + - tenant_id: + + Args: + BaseModel (pydantic.main.ModelMetaclass): A base class for creating + Pydantic models. + """ + + client_id: str + client_secret: str + tenant_id: str + + +class Outlook(Source): + """Class implementing the Outlook API. + + Documentation for this API is available at: + https://o365.github.io/python-o365/latest/getting_started.html. + """ + + UTC = timezone.utc + + def __init__( + self, + *args: list[Any], + credentials: dict[str, Any] | None = None, + config_key: str = "outlook", + **kwargs: dict[str, Any], + ): + """Outlook connector build for fetching Outlook API source. + + Data are fetched from start to end date range. If start or end date are not + provided then flow fetched data from yesterday by default. + + Args: + credentials (Optional[OutlookCredentials], optional): Outlook credentials. + Defaults to None + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to "outlook". + + Examples: + outlook = Outlook( + config_key=config_key, + ) + outlook.api_connection( + mailbox_name=mailbox_name, + request_retries=request_retries, + start_date=start_date, + end_date=end_date, + limit=limit, + address_limit=address_limit, + outbox_list=outbox_list, + ) + data_frame = outlook.to_df() + + Raises: + CredentialError: If credentials are not provided in local_config or + directly as a parameter. + """ + credentials = credentials or get_source_credentials(config_key) or None + + if credentials is None or not isinstance(credentials, dict): + msg = "Missing credentials." + raise CredentialError(msg) + self.credentials = dict(OutlookCredentials(**credentials)) + + super().__init__(*args, credentials=self.credentials, **kwargs) + + @staticmethod + def _get_subfolders( + folder_structure: dict, + folder: MailBox, + key_concat: str = "", + ) -> dict[str, list] | None: + """Retrieve all the subfolder in a MailBox folder. + + Args: + folder_structure (dict): Dictionary where to save the data. + folder (MailBox): The MailBox folder from where to extract the subfolders. + key_concat (str, optional) Previous Mailbox folder structure to add to + the actual subfolder. Defaults to "". + + Returns: + Dict[str, List]: `folder_structure` dictionary is returned once + it is updated. + """ + if key_concat: + tmp_key = key_concat.split("|") + key_concat = key_concat.replace(f"|{tmp_key[-1]}", "") + + for subfolder in folder.get_folders(): + if subfolder: + folder_structure.update( + { + "|".join([key_concat, folder.name, subfolder.name]).lstrip( + "|" + ): subfolder + } + ) + + if folder_structure: + return folder_structure + + return None + + def _get_all_folders( + self, + mailbox: MailBox, + ) -> dict: + """To retrieve all folders from a Mailbox object. + + Args: + mailbox (MailBox): Outlook Mailbox object from where to extract all + folder structure. + + Returns: + dict: Every single folder and subfolder is returned as + "parent (sub)folder|(sub)folder": Mailbox. + """ + dict_folders = self._get_subfolders({}, mailbox) + final_dict_folders = dict_folders.copy() + + # Get all subfolders. + while_dict_folders = {"key": "value"} + while len(while_dict_folders) != 0: + while_dict_folders = {} + for key, value in list(dict_folders.items()): + tmp_dict_folders = self._get_subfolders({}, value, key_concat=key) + if tmp_dict_folders: + final_dict_folders.update(tmp_dict_folders) + while_dict_folders.update(tmp_dict_folders) + + dict_folders = while_dict_folders.copy() + + return final_dict_folders + + # TODO: should be refactored. + def _get_messages_from_mailbox( # noqa: C901, PLR0912 + self, + mailbox_name: str, + dict_folder: dict, + date_range_start_time: datetime, + date_range_end_time: datetime, + limit: int = 10000, + address_limit: int = 8000, + outbox_list: list[str] | None = None, + ) -> list: + """To retrieve all messages from all the mailboxes passed in the dictionary. + + Args: + mailbox_name (str): Mailbox name. + dict_folder (dict): Mailboxes dictionary holder, with the following + structure: "parent (sub)folder|(sub)folder": Mailbox. + date_range_start_time (datetime): Start date from where to stract data. + date_range_end_time (datetime): End data up to where to stract data. + limit (int, optional): Number of fetched top messages. Defaults to 10000. + address_limit (int, optional): The maximum number of accepted characters in + the sum of all email names. Defaults to 8000. + outbox_list (List[str], optional): List of outbox folders to differenciate + between Inboxes and Outboxes. Defaults to ["Sent Items"]. + + Returns: + list: A list with all messages from all Mailboxes. + """ + if not outbox_list: + outbox_list = ["Sent Items"] + + data = [] + for key, value in list(dict_folder.items()): + count = 0 + for message in value.get_messages(limit=limit): + received_time = message.received + date_obj = datetime.fromisoformat(str(received_time)) + if ( + date_range_start_time.replace(tzinfo=self.UTC) + < date_obj + < date_range_end_time.replace(tzinfo=self.UTC) + ): + count += 1 + fetched = message.to_api_data() + sender_mail = fetched.get("from", None) + if sender_mail is not None: + sender_mail = fetched["from"]["emailAddress"]["address"] + recivers_list = fetched.get("toRecipients") + recivers = " " + + if recivers_list is not None: + for reciver in recivers_list: + add_string = f", {reciver['emailAddress']['address']}" + if ( + sum(list(map(len, [recivers, add_string]))) + >= address_limit + ): + break + + recivers += add_string + + categories = " " + if message.categories is not None: + categories = ", ".join( + categories for categories in message.categories + ) + + conversation_index = " " + if message.conversation_index is not None: + conversation_index = message.conversation_index + + if isinstance(message.subject, str): + subject = message.subject.replace("\t", " ") + else: + subject = message.subject + + row = { + "(sub)folder": value.name, + "conversation ID": fetched.get("conversationId"), + "conversation index": conversation_index, + "categories": categories, + "sender": sender_mail, + "subject": subject, + "recivers": recivers.strip(", "), + "received_time": fetched.get("receivedDateTime"), + "mail_adress": mailbox_name.split("@")[0] + .replace(".", "_") + .replace("-", "_"), + } + if any(x.lower() in key.lower() for x in outbox_list): + row["Inbox"] = False + else: + row["Inbox"] = True + + data.append(row) + + if count > 0: + self.logger.info(f"folder: {key.ljust(76, '-')} messages: {count}") + + return data + + def api_connection( + self, + mailbox_name: str | None = None, + request_retries: int = 10, + start_date: str | None = None, + end_date: str | None = None, + limit: int = 10000, + address_limit: int = 8000, + outbox_list: list[str] | None = None, + ) -> pd.DataFrame: + """Download all the messages stored in a MailBox folder and subfolders. + + Args: + mailbox_name (Optional[str], optional): Mailbox name. Defaults to None. + request_retries (int, optional): How many times retries to authorizate. + Defaults to 10. + start_date (Optional[str], optional): A filtering start date parameter e.g. + "2022-01-01". Defaults to None. + end_date (Optional[str], optional): A filtering end date parameter e.g. + "2022-01-02". Defaults to None. + limit (int, optional): Number of fetched top messages. Defaults to 10000. + address_limit (int, optional): The maximum number of accepted characters in + the sum of all email names. Defaults to 8000. + outbox_list (List[str], optional): List of outbox folders to differentiate + between Inboxes and Outboxes. Defaults to ["Sent Items"]. + + Returns: + pd.DataFrame: All messages are stored in a pandas framework. + """ + if not outbox_list: + outbox_list = ["Sent Items"] + account = Account( + (self.credentials["client_id"], self.credentials["client_secret"]), + auth_flow_type="credentials", + tenant_id=self.credentials["tenant_id"], + main_resource=mailbox_name, + request_retries=request_retries, + ) + + if account.authenticate(): + self.logger.info(f"{mailbox_name} Authenticated!") + else: + msg = "Failed to authenticate." + raise ValueError(msg) + + mailbox_obj = account.mailbox() + + if start_date is not None and end_date is not None: + date_range_end_time = datetime.strptime(end_date, "%Y-%m-%d") + date_range_start_time = datetime.strptime(start_date, "%Y-%m-%d") + else: + date_range_start_time = date.today() - timedelta(days=1) + date_range_end_time = date.today() + + min_time = datetime.min.time() + date_range_end_time = datetime.combine(date_range_end_time, min_time) + date_range_start_time = datetime.combine(date_range_start_time, min_time) + + final_dict_folders = self._get_all_folders(mailbox_obj) + + self.data = self._get_messages_from_mailbox( + mailbox_name=mailbox_name, + dict_folder=final_dict_folders, + date_range_start_time=date_range_start_time, + date_range_end_time=date_range_end_time, + limit=limit, + address_limit=address_limit, + outbox_list=outbox_list, + ) + + @add_viadot_metadata_columns + def to_df( + self, + if_empty: str = "warn", + ) -> pd.DataFrame: + """Generate a pandas DataFrame with the data. + + Args: + if_empty (str, optional): What to do if a fetch produce no data. + Defaults to "warn + + Returns: + pd.Dataframe: The response data as a pandas DataFrame plus viadot metadata. + """ + super().to_df(if_empty=if_empty) + + data_frame = pd.DataFrame(self.data) + + if data_frame.empty: + self._handle_if_empty( + if_empty="warn", + message="No data was got from the Mail Box for those days", + ) + else: + self.logger.info("Successfully downloaded data from the Mindful API.") + + return data_frame diff --git a/src/viadot/sources/redshift_spectrum.py b/src/viadot/sources/redshift_spectrum.py index 78302b3f4..bc89527bc 100644 --- a/src/viadot/sources/redshift_spectrum.py +++ b/src/viadot/sources/redshift_spectrum.py @@ -1,10 +1,21 @@ +"""Amazon Redshift Spectrum connector.""" + import os -from typing import List, Literal, Optional, Tuple +from typing import Literal -import awswrangler as wr -import boto3 import pandas as pd -import redshift_connector + + +try: + import awswrangler as wr + import boto3 + import redshift_connector +except ModuleNotFoundError as e: + msg = "Missing required modules to use RedshiftSpectrum source." + raise ImportError(msg) from e + +from typing import Any + from pydantic import BaseModel, root_validator from viadot.config import get_source_credentials @@ -16,7 +27,7 @@ class RedshiftSpectrumCredentials(BaseModel): region_name: str # The name of the AWS region. aws_access_key_id: str # The AWS access key ID. aws_secret_access_key: str # The AWS secret access key. - profile_name: str = None # The name of the IAM profile to use. + profile_name: str | None = None # The name of the IAM profile to use. # Below credentials are required only by some methods. # @@ -28,11 +39,12 @@ class RedshiftSpectrumCredentials(BaseModel): # password: Optional[str] # engine: Optional[str] = "redshift" # dbname: Optional[str] - credentials_secret: Optional[str] - iam_role: Optional[str] # The IAM role to assume. Used by `create_schema()`. + credentials_secret: str | None + iam_role: str | None # The IAM role to assume. Used by `create_schema()`. @root_validator(pre=True) - def is_configured(cls, credentials): + def is_configured(cls, credentials: dict[str, Any]) -> dict[str, Any]: # noqa: N805 + """Validate the credentials configuration.""" profile_name = credentials.get("profile_name") region_name = credentials.get("region_name") aws_access_key_id = credentials.get("aws_access_key_id") @@ -42,44 +54,40 @@ def is_configured(cls, credentials): direct_credential = aws_access_key_id and aws_secret_access_key and region_name if not (profile_credential or direct_credential): - raise CredentialError( - "Either `profile_name` and `region_name`, or `aws_access_key_id`, " - "`aws_secret_access_key`, and `region_name` must be specified." - ) + msg = "Either `profile_name` and `region_name`, or `aws_access_key_id`," + msg += " `aws_secret_access_key`, and `region_name` must be specified." + raise CredentialError(msg) return credentials class RedshiftSpectrum(Source): - """ - A class for pulling data from and uploading to a specified Amazon Redshift Spectrum - external schema. + def __init__( + self, + credentials: RedshiftSpectrumCredentials | None = None, + config_key: str | None = None, + *args, + **kwargs, + ): + """A class for working with Amazon Redshift Spectrum. - Note that internally, AWS SDK refers to schemas as "databases", as external schemas - correspond to AWS Glue databases. However, to keep consistent naming with all other - viadot sources, we use the word "schema" instead. + Note that internally, AWS SDK refers to schemas as "databases", as external + schemas correspond to AWS Glue databases. However, to keep consistent naming + with all other viadot sources, we use the word "schema" instead. - Args: + Args: credentials (RedshiftSpectrumCredentials, optional): RedshiftSpectrumCredentials credentials. Defaults to None. config_key (str, optional): The key in the viadot config holding relevant credentials. Defaults to None. - Examples: + Examples: ```python from viadot.sources import RedshiftSpectrum with RedshiftSpectrum(config_key="redshift_spectrum") as redshift: redshift.get_schemas() ``` - """ - - def __init__( - self, - credentials: RedshiftSpectrumCredentials = None, - config_key: str = None, - *args, - **kwargs, - ): + """ raw_creds = ( credentials or get_source_credentials(config_key) @@ -99,18 +107,17 @@ def __init__( self._session = None self._con = None - def __enter__(self): + def __enter__(self): # noqa: D105 return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, traceback): # noqa: D105, ANN001 if self._con: self._con.close() self._con = None @property def session(self) -> boto3.session.Session: - """ - A singleton-like property for initiating an AWS session with boto3. + """A singleton-like property for initiating an AWS session with boto3. Note that this is not an actual session, so it does not need to be closed. """ @@ -125,6 +132,7 @@ def session(self) -> boto3.session.Session: @property def con(self) -> redshift_connector.Connection: + """A singleton-like property for establishing a connection.""" if not self._con: if self.credentials.get("credentials_secret"): self._con = wr.redshift.connect( @@ -133,18 +141,16 @@ def con(self) -> redshift_connector.Connection: secret_id=self.credentials.get("credentials_secret"), ) else: - raise ValueError( - "`credentials_secret` config is required to connect to Redshift." - ) + msg = "The `credentials_secret` config is required to connect to Redshift." + raise ValueError(msg) return self._con def _get_env_credentials(self): - credentials = { + return { "region_name": os.environ.get("AWS_DEFAULT_REGION"), "aws_access_key_id": os.environ.get("AWS_ACCESS_KEY_ID"), "aws_secret_access_key": os.environ.get("AWS_SECRET_ACCESS_KEY"), } - return credentials def from_df( self, @@ -154,41 +160,42 @@ def from_df( table: str, extension: Literal[".parquet", ".csv"] = ".parquet", if_exists: Literal["overwrite", "overwrite_partitions", "append"] = "overwrite", - partition_cols: List[str] = None, + partition_cols: list[str] | None = None, sep: str = ",", - description: str = None, + description: str | None = None, **kwargs, ) -> None: - """ - Upload a pandas `DataFrame` into a CSV or Parquet file in a specified external - Amazon Redshift Spectrum schema. + """Upload a pandas `DataFrame` into a CSV or Parquet file. - For a full list of available parameters, please refer to the official documentation: + For a full list of available parameters, please refer to the official + documentation: https://aws-sdk-pandas.readthedocs.io/en/3.0.0/stubs/awswrangler.s3.to_parquet.html https://aws-sdk-pandas.readthedocs.io/en/3.0.0/stubs/awswrangler.s3.to_csv.html Args: df (pd.DataFrame): Pandas `DataFrame`. - to_path (str): Path to Amazon S3 folder where the table will be located. If needed, - a bottom-level directory named f"{table}" is automatically created, so - that files are always located in a folder named the same as the table. + to_path (str): Path to Amazon S3 folder where the table will be located. If + needed, a bottom-level directory named f"{table}" is automatically + created, so that files are always located in a folder named the same as + the table. schema (str): The name of the schema. table (str): The name of the table to load the data into. extension (Literal[".parquet", ".csv"], optional): Required file type. Defaults to '.parquet'. - if_exists (Literal["overwrite", "overwrite_partitions", "append"], optional): - 'overwrite' to recreate the table, 'overwrite_partitions' to only recreate - the partitions, 'append' to append the data. Defaults to 'overwrite'. - partition_cols (List[str], optional): List of column names that will be used to - create partitions. Only takes effect if dataset=True. Defaults to None. + if_exists (Literal["overwrite", "overwrite_partitions", "append"], optional + ): 'overwrite' to recreate the table, 'overwrite_partitions' to only + recreate the partitions, 'append' to append the data. Defaults to + 'overwrite'. + partition_cols (List[str], optional): List of column names that will be used + to create partitions. Only takes effect if dataset=True. Defaults to + None. sep (str, optional): Field delimiter for the output file. Defaults to ','. description (str, optional): Amazon Redshift Spectrum table description. Defaults to None. """ - # Ensure files are in a directory named {table}. if not to_path.rstrip("/").endswith(table): - to_path = os.path.join(to_path, table) + to_path = to_path.rstrip("/") + "/" + table if extension == ".parquet": wr.s3.to_parquet( @@ -215,7 +222,8 @@ def from_df( **kwargs, ) else: - raise ValueError("Only CSV and parquet formats are supported.") + msg = "Only CSV and parquet formats are supported." + raise ValueError(msg) def to_df( self, @@ -223,34 +231,32 @@ def to_df( table: str, **kwargs, ) -> pd.DataFrame: - """ - Reads a table from an Amazon Redshift Spectrum external schema into a pandas `DataFrame`. - For a full list of available parameters, please refer to the official documentation: + """Read a table from an external schema into a pandas `DataFrame`. + + For a full list of available parameters, please refer to the official + documentation: https://aws-sdk-pandas.readthedocs.io/en/3.0.0/stubs/awswrangler.s3.read_parquet_table.html Args: schema (str): The name of the schema. table (str): The name of the table to load. """ - - df = wr.s3.read_parquet_table( + return wr.s3.read_parquet_table( boto3_session=self.session, database=schema, table=table, **kwargs, ) - return df - def drop_table( self, schema: str, table: str, remove_files: bool = True, ) -> None: - """ - Drops a table from a specified Amazon Redshift Spectrum external schema, - including related files from Amazon S3, if specified. + """Drop a table from a specified external schema. + + Drops a table, including related files from Amazon S3, if specified. Args: schema (str): The name of the schema. @@ -273,21 +279,19 @@ def drop_table( def get_tables( self, schema: str, - ) -> List[str]: - """ - Returns a list of tables in a specified schema. + ) -> list[str]: + """Returns a list of tables in a specified schema. Args: schema (str): The name of the schema. """ - get_tables_query = f"SELECT t.tablename FROM SVV_EXTERNAL_TABLES t WHERE t.schemaname = '{schema}'" + get_tables_query = f"SELECT t.tablename FROM SVV_EXTERNAL_TABLES t WHERE t.schemaname = '{schema}'" # noqa: S608 with self.con.cursor() as cursor: tables_info = cursor.execute(get_tables_query).fetchall() return [table_info[0] for table_info in tables_info] def _check_if_table_exists(self, schema: str, table: str) -> bool: - """ - Check if a table exists in a specified Amazon Redshift Spectrum external schema. + """Check if a table exists in a specified Redshift Spectrum external schema. Args: schema (str): The name of the schema. @@ -301,10 +305,9 @@ def _check_if_table_exists(self, schema: str, table: str) -> bool: def create_schema( self, schema: str, - description: Optional[str] = None, + description: str | None = None, ) -> None: - """ - Create an external schema in Amazon Redshift Spectrum. + """Create an external schema in Amazon Redshift Spectrum. This involves two steps: - creating a Glue database @@ -312,7 +315,8 @@ def create_schema( Args: schema (str): The name of the schema. - description (str, optional): The description of the schema. Defaults to None. + description (str, optional): The description of the schema. Defaults to + None. """ self._create_glue_database( database=schema, description=description, exist_ok=True @@ -331,7 +335,7 @@ def create_schema( def _create_glue_database( self, database: str, - description: Optional[str] = None, + description: str | None = None, exist_ok: bool = False, ): """Create an AWS Glue database. @@ -352,23 +356,22 @@ def _create_glue_database( def get_schemas(self) -> list[str]: """Returns a list of schemas in the current Redshift Spectrum database.""" - # External Redshift schemas get_schemas_query = "SELECT schemaname FROM SVV_EXTERNAL_SCHEMAS" with self.con.cursor() as cursor: - schema_names: Tuple[list] = cursor.execute(get_schemas_query).fetchall() + schema_names: tuple[list] = cursor.execute(get_schemas_query).fetchall() external_schemas = [schema_name[0] for schema_name in schema_names] # Glue databases. schema_infos = wr.catalog.get_databases(boto3_session=self.session) glue_schemas = [schema_info["Name"] for schema_info in schema_infos] - # An external Redshift schema is a Spectrum schema only if it's also a Glue database. + # An external Redshift schema is a Spectrum schema only if it's also a Glue + # database. return [schema for schema in external_schemas if schema in glue_schemas] def _check_if_schema_exists(self, schema: str) -> bool: - """ - Check if a schema exists in Amazon Redshift Spectrum. + """Check if a schema exists in Amazon Redshift Spectrum. Args: schema (str): The name of the schema. @@ -379,8 +382,7 @@ def _check_if_schema_exists(self, schema: str) -> bool: return schema in self.get_schemas() def _is_spectrum_schema(self, schema: str) -> bool: - """ - Check if a Redshift schema is a Spectrum schema. + """Check if a Redshift schema is a Spectrum schema. Args: schema (str): The name of the schema. @@ -391,15 +393,14 @@ def _is_spectrum_schema(self, schema: str) -> bool: return self._check_if_schema_exists(schema) def drop_schema(self, schema: str, drop_glue_database: bool = False) -> None: - """ - Drop a Spectrum schema. If specified, also drop the underlying Glue database. + """Drop a Spectrum schema. If specified, also drop the underlying Glue database. Args: schema (str): The name of the schema. """ - if not self._is_spectrum_schema(schema): - raise ValueError(f"Schema {schema} is not a Spectrum schema.") + msg = f"Schema {schema} is not a Spectrum schema." + raise ValueError(msg) drop_external_schema_query = f"DROP SCHEMA IF EXISTS {schema}" with self.con.cursor() as cursor: diff --git a/src/viadot/sources/s3.py b/src/viadot/sources/s3.py index 7baf85833..a46f8dad4 100644 --- a/src/viadot/sources/s3.py +++ b/src/viadot/sources/s3.py @@ -1,10 +1,20 @@ +"""A module for working with Amazon S3 as a data source.""" + +from collections.abc import Iterable import os -from typing import Iterable, List, Literal, Union +from pathlib import Path +from typing import Literal + + +try: + import awswrangler as wr + import boto3 + import s3fs +except ModuleNotFoundError: + msg = "Missing required modules to use RedshiftSpectrum source." + raise ImportError(msg) from None -import awswrangler as wr -import boto3 import pandas as pd -import s3fs from pydantic import BaseModel, root_validator from viadot.config import get_source_credentials @@ -16,10 +26,17 @@ class S3Credentials(BaseModel): region_name: str # The name of the AWS region. aws_access_key_id: str # The AWS access key ID. aws_secret_access_key: str # The AWS secret access key. - profile_name: str = None # The name of the IAM profile to use. + profile_name: str | None = None # The name of the IAM profile to use. @root_validator(pre=True) - def is_configured(cls, credentials): + def is_configured(cls, credentials: dict) -> dict: # noqa: N805 + """Validate credentials. + + Ensure that at least one of the + following is provided: + - profile_name and region_name + - aws_access_key_id, aws_secret_access_key, and region_name + """ profile_name = credentials.get("profile_name") region_name = credentials.get("region_name") aws_access_key_id = credentials.get("aws_access_key_id") @@ -29,31 +46,28 @@ def is_configured(cls, credentials): direct_credential = aws_access_key_id and aws_secret_access_key and region_name if not (profile_credential or direct_credential): - raise CredentialError( - "Either `profile_name` and `region_name`, or `aws_access_key_id`, " - "`aws_secret_access_key`, and `region_name` must be specified." - ) + msg = "Either `profile_name` and `region_name`, or `aws_access_key_id`, " + msg += "`aws_secret_access_key`, and `region_name` must be specified." + raise CredentialError(msg) return credentials class S3(Source): - """ - A class for pulling data from and uploading to the Amazon S3. - - Args: - credentials (S3Credentials, optional): Amazon S3 credentials. - Defaults to None. - config_key (str, optional): The key in the viadot config holding relevant - credentials. Defaults to None. - """ - def __init__( self, - credentials: S3Credentials = None, - config_key: str = None, + credentials: S3Credentials | None = None, + config_key: str | None = None, *args, **kwargs, ): + """A class for pulling data from and uploading to the Amazon S3. + + Args: + credentials (S3Credentials, optional): Amazon S3 credentials. + Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + """ raw_creds = ( credentials or get_source_credentials(config_key) @@ -90,28 +104,24 @@ def session(self) -> boto3.session.Session: return self._session def _get_env_credentials(self): - credentials = { + return { "region_name": os.environ.get("AWS_DEFAULT_REGION"), "aws_access_key_id": os.environ.get("AWS_ACCESS_KEY_ID"), "aws_secret_access_key": os.environ.get("AWS_SECRET_ACCESS_KEY"), } - return credentials - def ls(self, path: str, suffix: str = None) -> List[str]: - """ - Returns a list of objects in a provided path. + def ls(self, path: str, suffix: str | None = None) -> list[str]: + """Returns a list of objects in a provided path. Args: path (str): Path to a folder. - suffix (Union[str, List[str], None]) - Suffix or List of suffixes for + suffix (Union[str, List[str], None]): Suffix or list of suffixes for filtering Amazon S3 keys. Defaults to None. """ - return wr.s3.list_objects(boto3_session=self.session, path=path, suffix=suffix) def exists(self, path: str) -> bool: - """ - Check if an object exists in the Amazon S3. + """Check if an object exists in the Amazon S3. Args: path (str): The path to an object to check. @@ -120,35 +130,35 @@ def exists(self, path: str) -> bool: bool: Whether the object exists. """ if not path.startswith("s3://"): - raise ValueError("Path must be an AWS S3 URL ('s3://my/path').") + msg = "Path must be an AWS S3 URL ('s3://my/path')." + raise ValueError(msg) # Note this only checks for files. file_exists = wr.s3.does_object_exist(boto3_session=self.session, path=path) if file_exists: return True + + # Use another method in case the path is a folder. + client = self.session.client("s3") + bucket = path.split("/")[2] + path = str(Path(*path.rstrip("/").split("/")[3:])) + + response = client.list_objects_v2(Bucket=bucket, Prefix=path, Delimiter="/") + + folders_with_prefix: list[dict] = response.get("CommonPrefixes") + if folders_with_prefix is None: + folder_exists = False else: - # Use another method in case the path is a folder. - client = self.session.client("s3") - bucket = path.split("/")[2] - path = os.path.join(*path.rstrip("/").split("/")[3:]) - - response = client.list_objects_v2(Bucket=bucket, Prefix=path, Delimiter="/") - - folders_with_prefix: list[dict] = response.get("CommonPrefixes") - if folders_with_prefix is None: - folder_exists = False - else: - # This is because list_objects takes in `Prefix`, so eg. if there exists - # a path `a/b/abc` and we run `list_objects_v2(path=`a/b/a`)`, - # it would enlist `a/b/abc` as well. - paths = [path["Prefix"].rstrip("/") for path in folders_with_prefix] - folder_exists = path in paths - return folder_exists + # This is because list_objects takes in `Prefix`, so eg. if there exists + # a path `a/b/abc` and we run `list_objects_v2(path=`a/b/a`)`, + # it would enlist `a/b/abc` as well. + paths = [path["Prefix"].rstrip("/") for path in folders_with_prefix] + folder_exists = path in paths + return folder_exists def cp(self, from_path: str, to_path: str, recursive: bool = False) -> None: - """ - Copies the contents of `from_path` to `to_path`. + """Copies the contents of `from_path` to `to_path`. Args: from_path (str): The path (S3 URL) of the source directory. @@ -171,9 +181,8 @@ def cp(self, from_path: str, to_path: str, recursive: bool = False) -> None: """ self.fs.copy(path1=from_path, path2=to_path, recursive=recursive) - def rm(self, path: Union[str, list[str]]) -> None: - """ - Delete files under `path`. + def rm(self, path: str | list[str]) -> None: + """Delete files under `path`. Args: path (list[str]): Path to a list of files or a directory @@ -188,7 +197,6 @@ def rm(self, path: Union[str, list[str]]) -> None: s3.rm(path=["file1.parquet"]) ``` """ - wr.s3.delete_objects(boto3_session=self.session, path=path) def from_df( @@ -198,18 +206,19 @@ def from_df( extension: Literal[".csv", ".parquet"] = ".parquet", **kwargs, ) -> None: - """ - Upload a pandas `DataFrame` into Amazon S3 as a CSV or Parquet file. - For full list of available parameters please refer to the official documentation: + """Upload a pandas `DataFrame` into Amazon S3 as a CSV or Parquet file. + + For a full list of available parameters, please refer to the official + documentation: https://aws-sdk-pandas.readthedocs.io/en/3.0.0/stubs/awswrangler.s3.to_parquet.html https://aws-sdk-pandas.readthedocs.io/en/3.0.0/stubs/awswrangler.s3.to_csv.html Args: df (pd.DataFrame): The pandas DataFrame to upload. path (str): The destination path. - extension (Literal[".csv", ".parquet"], optional): The file extension. Defaults to ".parquet". + extension (Literal[".csv", ".parquet"], optional): The file extension. + Defaults to ".parquet". """ - if extension == ".parquet": wr.s3.to_parquet( boto3_session=self.session, @@ -225,20 +234,20 @@ def from_df( **kwargs, ) else: - raise ValueError("Only parquet and CSV formats are supported.") + msg = "Only CSV and Parquet formats are supported." + raise ValueError(msg) def to_df( self, paths: list[str], - chunk_size: int = None, + chunk_size: int | None = None, **kwargs, - ) -> Union[pd.DataFrame, Iterable[pd.DataFrame]]: - """ - Reads a CSV or Parquet file into a pandas `DataFrame`. + ) -> pd.DataFrame | Iterable[pd.DataFrame]: + """Read a CSV or Parquet file into a pandas `DataFrame`. Args: - paths (list[str]): A list of paths to Amazon S3 files. All files under the path - must be of the same type. + paths (list[str]): A list of paths to Amazon S3 files. All files under the + path must be of the same type. chunk_size (int, optional): Number of rows to include in each chunk. Defaults to None, ie. return all data as a single `DataFrame`. @@ -267,7 +276,6 @@ def to_df( print(df) ``` """ - if chunk_size is None: # `chunked` expects either an integer or a boolean. chunk_size = False @@ -281,27 +289,24 @@ def to_df( boto3_session=self.session, path=paths, chunked=chunk_size, **kwargs ) else: - raise ValueError("Only CSV and parquet formats are supported.") + msg = "Only CSV and Parquet formats are supported." + raise ValueError(msg) return df def upload(self, from_path: str, to_path: str) -> None: - """ - Upload file(s) to S3. + """Upload file(s) to S3. Args: from_path (str): Path to local file(s) to be uploaded. to_path (str): Path to the destination file/folder. """ - wr.s3.upload(boto3_session=self.session, local_file=from_path, path=to_path) def download(self, from_path: str, to_path: str) -> None: - """ - Download file(s) from Amazon S3. + """Download file(s) from Amazon S3. Args: from_path (str): Path to file in Amazon S3. to_path (str): Path to local file(s) to be stored. """ - wr.s3.download(boto3_session=self.session, path=from_path, local_file=to_path) diff --git a/src/viadot/sources/sap_rfc.py b/src/viadot/sources/sap_rfc.py index 8e034f543..d756e677a 100755 --- a/src/viadot/sources/sap_rfc.py +++ b/src/viadot/sources/sap_rfc.py @@ -1,43 +1,45 @@ +"""SAP RFC connectors.""" + +from collections import OrderedDict +from collections import OrderedDict as OrderedDictType +from collections.abc import Iterable, Iterator import logging import re -from collections import OrderedDict from typing import ( Any, - Dict, - Iterable, - Iterator, - List, Literal, - Tuple, - Union, -) -from typing import ( - OrderedDict as OrderedDictType, ) import numpy as np +from numpy.typing import ArrayLike import pandas as pd +import pyrfc + try: import pyrfc from pyrfc._exception import ABAPApplicationError -except ModuleNotFoundError: - raise ImportError("pyfrc is required to use the SAPRFC source.") +except ModuleNotFoundError as e: + msg = "Missing required modules to use SAPRFC source." + raise ImportError(msg) from e + from sql_metadata import Parser from viadot.config import get_source_credentials -from viadot.exceptions import CredentialError, DataBufferExceeded +from viadot.exceptions import CredentialError, DataBufferExceededError from viadot.sources.base import Source from viadot.utils import add_viadot_metadata_columns, validate + logger = logging.getLogger() def adjust_where_condition_by_adding_missing_spaces(sql: str) -> str: - """Function for adding white spaces between operators and `WHERE` statement. - This function is taking raw sql string and sanitizing it at the beginning of the - 'query()' method, so other methods that taking sql as parameter could have sql - without whitespaces issues. + """Add white spaces between operators and `WHERE` statement. + + This function is taking raw sql string and sanitizing it at the beginning of the + 'query()' method, so other methods that taking sql as parameter could have sql + without whitespaces issues. Args: sql (str): raw sql query passed in flow @@ -45,13 +47,14 @@ def adjust_where_condition_by_adding_missing_spaces(sql: str) -> str: Returns: str: sql query after adding white spaces if needed """ - - # Check if 'WHERE' statement is not attached to 'FROM' or column name as there is need for space " " on both side of 'WHERE' + # Check if 'WHERE' statement is not attached to 'FROM' or column name as there is + # a need for space on both sides of 'WHERE'. sql = re.sub(rf'{re.escape("WHERE")}(?", "!=", "<=", ">=", "!<", "!>", "=", ">", "<"] reverse_check = [ "< >", @@ -73,28 +76,24 @@ def adjust_where_condition_by_adding_missing_spaces(sql: str) -> str: return sql -def remove_whitespaces(text): +def _remove_whitespaces(text: str) -> str: return " ".join(text.split()) -def get_keyword_for_condition(where: str, condition: str) -> str: +def _get_keyword_for_condition(where: str, condition: str) -> str: where = where[: where.find(condition)] return where.split()[-1] -def get_where_uppercased(where: str) -> str: - """ - Uppercase a WHERE clause's keywords without - altering the original string. - """ +def _get_where_uppercased(where: str) -> str: + """Uppercase a WHERE clause's keywords without altering the original string.""" where_and_uppercased = re.sub("\\sand ", " AND ", where) - where_and_and_or_uppercased = re.sub("\\sor ", " OR ", where_and_uppercased) - return where_and_and_or_uppercased + return re.sub("\\sor ", " OR ", where_and_uppercased) -def remove_last_condition(where: str) -> str: +def _remove_last_condition(where: str) -> str: """Remove the last condtion from a WHERE clause.""" - where = get_where_uppercased(where) + where = _get_where_uppercased(where) split_by_and = re.split("\\sAND ", where) conditions = [re.split("\\sOR ", expr) for expr in split_by_and] conditions_flattened = [ @@ -109,25 +108,23 @@ def remove_last_condition(where: str) -> str: return where_trimmed_without_last_keyword, condition_to_remove -def trim_where(where: str) -> Tuple[str, OrderedDictType[str, str]]: - """ - Trim a WHERE clause to 75 characters or less, - as required by SAP. The rest of filters will be applied - in-memory on client side. - """ +def _trim_where(where: str) -> tuple[str, OrderedDictType[str, str] | None]: + """Trim a WHERE clause to 75 characters or less, as required by SAP RFC. - if len(where) <= 75: + The rest of filters will be applied in-memory on client side. + """ + if len(where) <= SAPRFC.COL_CHARACTER_WIDTH_LIMIT: return where, None wheres_to_add = OrderedDict() keywords_with_conditions = [] where_trimmed = where - while len(where_trimmed) > 75: + while len(where_trimmed) > SAPRFC.COL_CHARACTER_WIDTH_LIMIT: # trim the where - where_trimmed, removed_condition = remove_last_condition(where_trimmed) + where_trimmed, removed_condition = _remove_last_condition(where_trimmed) # store the removed conditions so we can readd them later - keyword = get_keyword_for_condition(where, removed_condition) + keyword = _get_keyword_for_condition(where, removed_condition) keywords_with_conditions.append((keyword, removed_condition)) wheres_to_add_sorted = keywords_with_conditions[::-1] @@ -136,22 +133,28 @@ def trim_where(where: str) -> Tuple[str, OrderedDictType[str, str]]: return where_trimmed, wheres_to_add -def detect_extra_rows( - row_index: int, data_raw: np.array, chunk: int, fields: List[str] -) -> Union[int, np.array, bool]: - """Check if, in between calls to the SAP table, the number of rows have increased. - If so, remove the last rows added, to fit the size of the previous columns. +def _detect_extra_rows( + row_index: int, + data_raw: ArrayLike, + chunk: int, + fields: list[str], +) -> int | ArrayLike | bool: + """Check if, in between calls to the SAP table, the number of rows has increased. + + If so, remove the last rows added, to fit the size of the previous columns. Args: - row_index (int): Number of rows set it down in he first iteration with the SAP table. - data_raw (np.array): Array with the data retrieve from SAP table. - chunk (int): The part number in which a number of SAP table columns have been split. + row_index (int): Number of rows set it down in he first iteration with the SAP + table. + data_raw (ArrayLike): Array with the data retrieve from SAP table. + chunk (int): The part number in which a number of SAP table columns have been + split. fields (List[str]): A list with the names of the columns in a chunk. Returns: - Union[int, np.array, bool]: A tuple with the parameters "row_index", "data_raw", a new - boolean variable "start" to indicate when the for loop has to be restarted, - and "chunk" variable. + Union[int, ArrayLike, bool]: A tuple with the parameters "row_index", + "data_raw", a new boolean variable "start" to indicate when the for loop has + to be restarted, and "chunk" variable. """ start = False if row_index == 0: @@ -170,107 +173,11 @@ def detect_extra_rows( return row_index, data_raw, start -def replace_separator_in_data( - data_raw: np.array, - no_sep_index: np.array, - record_key: str, - pos_sep_index: np.array, - sep: str, - replacement: str, -) -> np.array: - """Function to replace the extra separator in every row of the data_raw numpy array. - - Args: - data_raw (np.array): Array with the data retrieve from SAP table. - no_sep_index (np.array): Array with indexes where are extra separators characters in rows. - record_key (str): Key word to extract the data from the numpy array "data_raw". - pos_sep_index (np.array): Array with indexes where are placed real separators. - sep (str): Which separator to use when querying SAP. - replacement (str): In case of sep is on a columns, set up a new character to replace - inside the string to avoid flow breakdowns. +def _gen_split(data: Iterable[str], sep: str, record_key: str) -> Iterator[list[str]]: + """Split each string in the given iterable using the specified separator. - Returns: - np.array: the same data_raw numpy array with the "replacement" separator instead. - """ - for no_sep in no_sep_index: - logger.warning( - "A separator character was found and replaced inside a string text that could produce future errors:" - ) - logger.warning("\n" + data_raw[no_sep][record_key]) - split_array = np.array([*data_raw[no_sep][record_key]]) - position = np.where(split_array == f"{sep}")[0] - index_sep_index = np.argwhere(np.in1d(position, pos_sep_index) == False) # noqa - index_sep_index = index_sep_index.reshape( - len(index_sep_index), - ) - split_array[position[index_sep_index]] = replacement - data_raw[no_sep][record_key] = "".join(split_array) - logger.warning("\n" + data_raw[no_sep][record_key]) - - return data_raw - - -def catch_extra_separators( - data_raw: np.array, record_key: str, sep: str, fields: List[str], replacement: str -) -> np.array: - """Function to replace extra separators in every row of the table. - - Args: - data_raw (np.array): Array with the data retrieve from SAP table. - record_key (str): Key word to extract the data from the numpy array "data_raw". - sep (str): Which separator to use when querying SAP. - fields (List[str]): A list with the names of the columns in a chunk. - replacement (str): In case of sep is on a columns, set up a new character to replace - inside the string to avoid flow breakdowns. - - Returns: - np.array: The argument "data_raw" with no extra delimiters. - """ - - # remove scape characters from data_raw ("\t") - for n, r in enumerate(data_raw): - if "\t" in r[record_key]: - data_raw[n][record_key] = r[record_key].replace("\t", " ") - - # first it is identified where the data has an extra separator in text columns. - sep_counts = np.array([], dtype=int) - for row in data_raw: - sep_counts = np.append(sep_counts, row[record_key].count(f"{sep}")) - - no_sep_index = np.argwhere(sep_counts != len(fields) - 1) - no_sep_index = no_sep_index.reshape( - len(no_sep_index), - ) - sep_index = np.argwhere(sep_counts == len(fields) - 1) - sep_index = sep_index.reshape( - len(sep_index), - ) - # identifying "good" rows we obtain the index of separator positions. - pos_sep_index = np.array([], dtype=int) - for data in data_raw[sep_index]: - pos_sep_index = np.append( - pos_sep_index, - np.where(np.array([*data[record_key]]) == f"{sep}"), - ) - pos_sep_index = np.unique(pos_sep_index) - - # in rows with an extra separator, we replace them by another character: "-" by default - data_raw = replace_separator_in_data( - data_raw, - no_sep_index, - record_key, - pos_sep_index, - sep, - replacement, - ) - - return data_raw - - -def gen_split(data: Iterable[str], sep: str, record_key: str) -> Iterator[List[str]]: - """ - Splits each string in the given iterable using the specified separator and yields the resulting list. - Helps to reduce memory usage when processing big data sets. + Helps to reduce memory usage when processing big data sets by yielding the resulting + list. Args: data: An iterable collection of strings to be split. @@ -289,8 +196,7 @@ def gen_split(data: Iterable[str], sep: str, record_key: str) -> Iterator[List[s class SAPRFC(Source): - """ - A class for querying SAP with SQL using the RFC protocol. + """A class for querying SAP with SQL using the RFC protocol. Note that only a very limited subset of SQL is supported: - aliases @@ -304,38 +210,42 @@ class SAPRFC(Source): - etc. """ + COL_CHARACTER_WIDTH_LIMIT = 75 + def __init__( self, - sep: str = None, + sep: str | None = None, func: str = "RFC_READ_TABLE", rfc_total_col_width_character_limit: int = 400, - credentials: Dict[str, Any] = None, - config_key: str = None, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, *args, **kwargs, ): """Create an instance of the SAPRFC class. Args: - sep (str, optional): Which separator to use when querying SAP. If not provided, - multiple options are automatically tried. + sep (str, optional): Which separator to use when querying SAP. If not + provided, multiple options are automatically tried. func (str, optional): SAP RFC function to use. Defaults to "RFC_READ_TABLE". - rfc_total_col_width_character_limit (int, optional): Number of characters by which query will be split in chunks - in case of too many columns for RFC function. According to SAP documentation, the limit is - 512 characters. However, we observed SAP raising an exception even on a slightly lower number - of characters, so we add a safety margin. Defaults to 400. + rfc_total_col_width_character_limit (int, optional): Number of characters by + which query will be split in chunks in case of too many columns for RFC + function. According to SAP documentation, the limit is 512 characters. + However, we observed SAP raising an exception even on a slightly lower + number of characters, so we add a safety margin. Defaults to 400. credentials (Dict[str, Any], optional): 'api_key'. Defaults to None. - config_key (str, optional): The key in the viadot config holding relevant credentials. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Raises: CredentialError: If provided credentials are incorrect. """ - self._con = None credentials = credentials or get_source_credentials(config_key) if credentials is None: - raise CredentialError("Please specify the credentials.") + msg = "Please specify the credentials." + raise CredentialError(msg) super().__init__(*args, credentials=credentials, **kwargs) @@ -346,6 +256,7 @@ def __init__( @property def con(self) -> pyrfc.Connection: + """The pyRFC connection to SAP.""" if self._con is not None: return self._con con = pyrfc.Connection(**self.credentials) @@ -353,27 +264,28 @@ def con(self) -> pyrfc.Connection: return con def check_connection(self) -> None: + """Check the connection to SAP.""" self.logger.info("Checking the connection...") self.con.ping() self.logger.info("Connection has been validated successfully.") def close_connection(self) -> None: - """Closing RFC connection.""" + """Close the RFC connection.""" self.con.close() self.logger.info("Connection has been closed successfully.") def get_function_parameters( self, function_name: str, - description: Union[None, Literal["short", "long"]] = "short", + description: None | Literal["short", "long"] = "short", *args, - ) -> Union[List[str], pd.DataFrame]: + ) -> list[str] | pd.DataFrame: """Get the description for a SAP RFC function. Args: function_name (str): The name of the function to detail. - description (Union[None, Literal[, optional): Whether to display - a short or a long description. Defaults to "short". + description (Union[None, Literal[, optional): Whether to display a short or + a long description. Defaults to "short". Raises: ValueError: If the argument for description is incorrect. @@ -383,11 +295,9 @@ def get_function_parameters( parameter names (if 'description' is set to None), or a short or long description. """ - if description is not None: - if description not in ["short", "long"]: - raise ValueError( - "Incorrect value for 'description'. Correct values: (None, 'short', 'long'" - ) + if description not in ["short", "long"]: + msg = "Incorrect value for 'description'. Correct values: None, 'short', 'long'." + raise ValueError(msg) descr = self.con.get_function_description(function_name, *args) param_names = [param["name"] for param in descr.parameters] @@ -414,7 +324,7 @@ def get_function_parameters( return params - def _get_where_condition(self, sql: str) -> str: + def _get_where_condition(self, sql: str) -> str | None: """Retrieve the WHERE conditions from a SQL query. Args: @@ -428,7 +338,6 @@ def _get_where_condition(self, sql: str) -> str: Returns: str: The where clause trimmed to <= 75 characters. """ - where_match = re.search("\\sWHERE ", sql.upper()) if not where_match: return None @@ -437,29 +346,26 @@ def _get_where_condition(self, sql: str) -> str: limit_pos = limit_match.span()[0] if limit_match else len(sql) where = sql[where_match.span()[1] : limit_pos] - where_sanitized = remove_whitespaces(where) - where_trimmed, client_side_filters = trim_where(where_sanitized) + where_sanitized = _remove_whitespaces(where) + where_trimmed, client_side_filters = _trim_where(where_sanitized) if client_side_filters: self.logger.warning( "A WHERE clause longer than 75 character limit detected." ) - if "OR" in [key.upper() for key in client_side_filters.keys()]: - raise ValueError( - "WHERE conditions after the 75 character limit can only be combined with the AND keyword." - ) + if "OR" in [key.upper() for key in client_side_filters]: + msg = "WHERE conditions after the 75 character limit can only be combined with the AND keyword." + raise ValueError(msg) for val in client_side_filters.values(): if ")" in val: - raise ValueError( - """Nested conditions eg. AND (col_1 = 'a' AND col_2 = 'b') found between or after 75 chararacters in WHERE condition! - Please change nested conditions part of query separeted with 'AND' keywords, or place nested conditions part at the begining of the where statement. - """ - ) - else: - filters_pretty = list(client_side_filters.items()) - self.logger.warning( - f"Trimmed conditions ({filters_pretty}) will be applied client-side." - ) - self.logger.warning("See the documentation for caveats.") + msg = "Nested conditions eg. AND (col_1 = 'a' AND col_2 = 'b') found between or after 75 characters in WHERE condition!" + msg += " Please change nested conditions part of query separated with 'AND' keywords," + msg += " or place nested conditions part at the beginning of the where statement." + raise ValueError(msg) + filters_pretty = list(client_side_filters.items()) + self.logger.warning( + f"Trimmed conditions ({filters_pretty}) will be applied client-side." + ) + self.logger.warning("See the documentation for caveats.") self.client_side_filters = client_side_filters return where_trimmed @@ -468,13 +374,15 @@ def _get_where_condition(self, sql: str) -> str: def _get_table_name(sql: str) -> str: parsed = Parser(sql) if len(parsed.tables) > 1: - raise ValueError("Querying more than one table is not supported.") + msg = "Querying more than one table is not supported." + raise ValueError(msg) return parsed.tables[0] def _build_pandas_filter_query( self, client_side_filters: OrderedDictType[str, str] ) -> str: """Build a WHERE clause that will be applied client-side. + This is required if the WHERE clause passed to query() is longer than 75 characters. @@ -488,20 +396,19 @@ def _build_pandas_filter_query( """ for i, f in enumerate(client_side_filters.items()): if i == 0: - # skip the first keyword; we assume it's "AND" + # Skip the first keyword; we assume it's "AND". query = f[1] else: query += " " + f[0] + " " + f[1] filter_column_name = f[1].split()[0] resolved_column_name = self._resolve_col_name(filter_column_name) - query = re.sub("\\s?=\\s?", " == ", query).replace( + return re.sub("\\s?=\\s?", " == ", query).replace( filter_column_name, resolved_column_name ) - return query def extract_values(self, sql: str) -> None: - """TODO: This should cover all values, not just columns""" + """TODO: This should cover all values, not just columns.""" self.where = self._get_where_condition(sql) self.select_columns = self._get_columns(sql, aliased=False) self.select_columns_aliased = self._get_columns(sql, aliased=True) @@ -510,7 +417,7 @@ def _resolve_col_name(self, column: str) -> str: """Get aliased column name if it exists, otherwise return column name.""" return self.aliases_keyed_by_columns.get(column, column) - def _get_columns(self, sql: str, aliased: bool = False) -> List[str]: + def _get_columns(self, sql: str, aliased: bool = False) -> list[str]: """Retrieve column names from a SQL query. Args: @@ -531,19 +438,13 @@ def _get_columns(self, sql: str, aliased: bool = False) -> List[str]: self.aliases_keyed_by_columns = aliases_keyed_by_columns - columns = [ - ( - aliases_keyed_by_columns[col] - if col in aliases_keyed_by_columns - else col - ) - for col in columns - ] + columns = [aliases_keyed_by_columns.get(col, col) for col in columns] if self.client_side_filters: - # In case the WHERE clause is > 75 characters long, we execute the rest of the filters - # client-side. To do this, we need to pull all fields in the client-side WHERE conditions. - # Below code adds these columns to the list of SELECTed fields. + # In case the WHERE clause is > 75 characters long, we execute the rest of + # the filters client-side. To do this, we need to pull all fields in the + # client-side WHERE conditions. Below code adds these columns to the list of + # SELECTed fields. cols_to_add = [v.split()[0] for v in self.client_side_filters.values()] if aliased: cols_to_add = [aliases_keyed_by_columns[col] for col in cols_to_add] @@ -554,7 +455,7 @@ def _get_columns(self, sql: str, aliased: bool = False) -> List[str]: @staticmethod def _get_limit(sql: str) -> int: - """Get limit from the query""" + """Get limit from the query.""" limit_match = re.search("\\sLIMIT ", sql.upper()) if not limit_match: return None @@ -563,28 +464,27 @@ def _get_limit(sql: str) -> int: @staticmethod def _get_offset(sql: str) -> int: - """Get offset from the query""" + """Get offset from the query.""" offset_match = re.search("\\sOFFSET ", sql.upper()) if not offset_match: return None return int(sql[offset_match.span()[1] :].split()[0]) - def query(self, sql: str, sep: str = None) -> None: - """Parse an SQL query into pyRFC commands and save it into - an internal dictionary. + def query(self, sql: str, sep: str | None = None) -> None: + """Parse an SQL query into pyRFC commands and save it into an internal dict. Args: sql (str): The SQL query to be ran. - sep (str, optional): The separator to be used - to split columns in the result blob. Defaults to self.sep. + sep (str, optional): The separator to be used to split columns in the result + blob. Defaults to self.sep. Raises: ValueError: If the query is not a SELECT query. """ - if not sql.strip().upper().startswith("SELECT"): - raise ValueError("Only SELECT queries are supported.") + msg = "Only SELECT queries are supported." + raise ValueError(msg) sep = sep if sep is not None else self.sep @@ -594,11 +494,12 @@ def query(self, sql: str, sep: str = None) -> None: self.extract_values(sql) table_name = self._get_table_name(sql) - # this has to be called before checking client_side_filters + # This has to be called before checking client_side_filters. where = self.where columns = self.select_columns character_limit = self.rfc_total_col_width_character_limit - # due to the RFC_READ_TABLE limit of characters per row, colums are splited into smaller lists + # Due to the RFC_READ_TABLE limit of characters per row, columns are split into + # smaller lists. lists_of_columns = [] cols = [] col_length_total = 0 @@ -618,22 +519,22 @@ def query(self, sql: str, sep: str = None) -> None: options = [{"TEXT": where}] if where else None limit = self._get_limit(sql) offset = self._get_offset(sql) - query_json = dict( - QUERY_TABLE=table_name, - FIELDS=columns, - OPTIONS=options, - ROWCOUNT=limit, - ROWSKIPS=offset, - DELIMITER=sep, - ) + query_json = { + "QUERY_TABLE": table_name, + "FIELDS": columns, + "OPTIONS": options, + "ROWCOUNT": limit, + "ROWSKIPS": offset, + "DELIMITER": sep, + } # SAP doesn't understand None, so we filter out non-specified parameters query_json_filtered = { key: query_json[key] for key in query_json if query_json[key] is not None } self._query = query_json_filtered - def call(self, func: str, *args, **kwargs): - """Call a SAP RFC function""" + def call(self, func: str, *args, **kwargs) -> dict[str, Any]: + """Call a SAP RFC function.""" return self.con.call(func, *args, **kwargs) def _get_alias(self, column: str) -> str: @@ -642,10 +543,10 @@ def _get_alias(self, column: str) -> str: def _get_client_side_filter_cols(self): return [f[1].split()[0] for f in self.client_side_filters.items()] + # TODO: refactor to remove linter warnings and so this can be tested. @add_viadot_metadata_columns - def to_df(self, tests: dict = None): - """ - Load the results of a query into a pandas DataFrame. + def to_df(self, tests: dict | None = None) -> pd.DataFrame: # noqa: C901, PLR0912, RUF100 + """Load the results of a query into a pandas DataFrame. Due to SAP limitations, if the length of the WHERE clause is longer than 75 characters, we trim whe WHERE clause and perform the rest of the filtering @@ -664,7 +565,8 @@ def to_df(self, tests: dict = None): function from utils. Defaults to None. Returns: - pd.DataFrame: A DataFrame representing the result of the query provided in `PyRFC.query()`. + pd.DataFrame: A DataFrame representing the result of the query provided in + `PyRFC.query()`. """ params = self._query columns = self.select_columns_aliased @@ -674,8 +576,8 @@ def to_df(self, tests: dict = None): logger.info(f"Data will be downloaded in {len(fields_lists)} chunks.") func = self.func if sep is None: - # automatically find a working separator - SEPARATORS = [ + # Automatically find a working separator. + separators = [ "|", "/t", "#", @@ -690,10 +592,10 @@ def to_df(self, tests: dict = None): "$", ] else: - SEPARATORS = [sep] + separators = [sep] records = None - for sep in SEPARATORS: + for sep in separators: logger.info(f"Checking if separator '{sep}' works.") df = pd.DataFrame() self._query["DELIMITER"] = sep @@ -706,11 +608,9 @@ def to_df(self, tests: dict = None): response = self.call(func, **params) except ABAPApplicationError as e: if e.key == "DATA_BUFFER_EXCEEDED": - raise DataBufferExceeded( - "Character limit per row exceeded. Please select fewer columns." - ) - else: - raise e + msg = "Character limit per row exceeded. Please select fewer columns." + raise DataBufferExceededError(msg) from e + raise record_key = "WA" data_raw = response["DATA"] records = [row[record_key].split(sep) for row in data_raw] @@ -745,8 +645,10 @@ def to_df(self, tests: dict = None): class SAPRFCV2(Source): - """ - A class for querying SAP with SQL using the RFC protocol. + """A class for querying SAP with SQL using the RFC protocol. + + This is mostly a copy of SAPRFC, with some unidentified modifications that should + have probably been added as features to the SAPRFC source. Note that only a very limited subset of SQL is supported: - aliases @@ -762,41 +664,45 @@ class SAPRFCV2(Source): def __init__( self, - sep: str = None, + sep: str | None = None, replacement: str = "-", func: str = "RFC_READ_TABLE", rfc_total_col_width_character_limit: int = 400, - rfc_unique_id: List[str] = None, - credentials: Dict[str, Any] = None, - config_key: str = None, + rfc_unique_id: list[str] | None = None, + credentials: dict[str, Any] | None = None, + config_key: str | None = None, *args, **kwargs, ): """Create an instance of the SAPRFC class. Args: - sep (str, optional): Which separator to use when querying SAP. If not provided, - multiple options are automatically tried. - replacement (str, optional): In case of separator is on a columns, set up a new character to replace - inside the string to avoid flow breakdowns. Defaults to "-". + sep (str, optional): Which separator to use when querying SAP. If not + provided, multiple options are automatically tried. + replacement (str, optional): In case of separator is on a columns, set up a + new character to replace inside the string to avoid flow breakdowns. + Defaults to "-". func (str, optional): SAP RFC function to use. Defaults to "RFC_READ_TABLE". - rfc_total_col_width_character_limit (int, optional): Number of characters by which query will be split in chunks - in case of too many columns for RFC function. According to SAP documentation, the limit is - 512 characters. However, we observed SAP raising an exception even on a slightly lower number - of characters, so we add a safety margin. Defaults to 400. - rfc_unique_id (List[str], optional): Reference columns to merge chunks Data Frames. These columns must to be unique. Defaults to None. + rfc_total_col_width_character_limit (int, optional): Number of characters by + which query will be split in chunks in case of too many columns for RFC + function. According to SAP documentation, the limit is 512 characters. + However, we observed SAP raising an exception even on a slightly lower + number of characters, so we add a safety margin. Defaults to 400. + rfc_unique_id (List[str], optional): Reference columns to merge chunks + DataFrames. These columns must to be unique. Defaults to None. credentials (Dict[str, Any], optional): 'api_key'. Defaults to None. - config_key (str, optional): The key in the viadot config holding relevant credentials. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Raises: CredentialError: If provided credentials are incorrect. """ - self._con = None credentials = credentials or get_source_credentials(config_key) if credentials is None: - raise CredentialError("Please specify the credentials.") + msg = "Please specify the credentials." + raise CredentialError(msg) super().__init__(*args, credentials=credentials, **kwargs) @@ -813,6 +719,7 @@ def __init__( @property def con(self) -> pyrfc.Connection: + """The pyRFC connection to SAP.""" if self._con is not None: return self._con con = pyrfc.Connection(**self.credentials) @@ -820,21 +727,22 @@ def con(self) -> pyrfc.Connection: return con def check_connection(self) -> None: + """Check the connection to SAP.""" self.logger.info("Checking the connection...") self.con.ping() self.logger.info("Connection has been validated successfully.") def close_connection(self) -> None: - """Closing RFC connection.""" + """Close the SAP RFC connection.""" self.con.close() self.logger.info("Connection has been closed successfully.") def get_function_parameters( self, function_name: str, - description: Union[None, Literal["short", "long"]] = "short", + description: None | Literal["short", "long"] = "short", *args, - ) -> Union[List[str], pd.DataFrame]: + ) -> list[str] | pd.DataFrame: """Get the description for a SAP RFC function. Args: @@ -850,11 +758,9 @@ def get_function_parameters( parameter names (if 'description' is set to None), or a short or long description. """ - if description is not None: - if description not in ["short", "long"]: - raise ValueError( - "Incorrect value for 'description'. Correct values: (None, 'short', 'long'" - ) + if description not in ["short", "long"]: + msg = "Incorrect value for 'description'. Correct values: None, 'short', 'long'." + raise ValueError(msg) descr = self.con.get_function_description(function_name, *args) param_names = [param["name"] for param in descr.parameters] @@ -895,7 +801,6 @@ def _get_where_condition(self, sql: str) -> str: Returns: str: The where clause trimmed to <= 75 characters. """ - where_match = re.search("\\sWHERE ", sql.upper()) if not where_match: return None @@ -904,29 +809,26 @@ def _get_where_condition(self, sql: str) -> str: limit_pos = limit_match.span()[0] if limit_match else len(sql) where = sql[where_match.span()[1] : limit_pos] - where_sanitized = remove_whitespaces(where) - where_trimmed, client_side_filters = trim_where(where_sanitized) + where_sanitized = _remove_whitespaces(where) + where_trimmed, client_side_filters = _trim_where(where_sanitized) if client_side_filters: self.logger.warning( "A WHERE clause longer than 75 character limit detected." ) - if "OR" in [key.upper() for key in client_side_filters.keys()]: - raise ValueError( - "WHERE conditions after the 75 character limit can only be combined with the AND keyword." - ) + if "OR" in [key.upper() for key in client_side_filters]: + msg = "WHERE conditions after the 75 character limit can only be combined with the AND keyword." + raise ValueError(msg) for val in client_side_filters.values(): if ")" in val: - raise ValueError( - """Nested conditions eg. AND (col_1 = 'a' AND col_2 = 'b') found between or after 75 chararacters in WHERE condition! - Please change nested conditions part of query separeted with 'AND' keywords, or place nested conditions part at the begining of the where statement. - """ - ) - else: - filters_pretty = list(client_side_filters.items()) - self.logger.warning( - f"Trimmed conditions ({filters_pretty}) will be applied client-side." - ) - self.logger.warning("See the documentation for caveats.") + msg = "Nested conditions eg. AND (col_1 = 'a' AND col_2 = 'b') found between or after 75 characters in WHERE condition!" + msg += " Please change nested conditions part of query separated with 'AND' keywords," + msg += " or place nested conditions part at the beginning of the where statement." + raise ValueError(msg) + filters_pretty = list(client_side_filters.items()) + self.logger.warning( + f"Trimmed conditions ({filters_pretty}) will be applied client-side." + ) + self.logger.warning("See the documentation for caveats.") self.client_side_filters = client_side_filters return where_trimmed @@ -935,13 +837,15 @@ def _get_where_condition(self, sql: str) -> str: def _get_table_name(sql: str) -> str: parsed = Parser(sql) if len(parsed.tables) > 1: - raise ValueError("Querying more than one table is not supported.") + msg = "Querying more than one table is not supported." + raise ValueError(msg) return parsed.tables[0] def _build_pandas_filter_query( self, client_side_filters: OrderedDictType[str, str] ) -> str: """Build a WHERE clause that will be applied client-side. + This is required if the WHERE clause passed to query() is longer than 75 characters. @@ -962,13 +866,12 @@ def _build_pandas_filter_query( filter_column_name = f[1].split()[0] resolved_column_name = self._resolve_col_name(filter_column_name) - query = re.sub("\\s?=\\s?", " == ", query).replace( + return re.sub("\\s?=\\s?", " == ", query).replace( filter_column_name, resolved_column_name ) - return query def extract_values(self, sql: str) -> None: - """TODO: This should cover all values, not just columns""" + """TODO: This should cover all values, not just columns.""" self.where = self._get_where_condition(sql) self.select_columns = self._get_columns(sql, aliased=False) self.select_columns_aliased = self._get_columns(sql, aliased=True) @@ -977,7 +880,7 @@ def _resolve_col_name(self, column: str) -> str: """Get aliased column name if it exists, otherwise return column name.""" return self.aliases_keyed_by_columns.get(column, column) - def _get_columns(self, sql: str, aliased: bool = False) -> List[str]: + def _get_columns(self, sql: str, aliased: bool = False) -> list[str]: """Retrieve column names from a SQL query. Args: @@ -998,19 +901,13 @@ def _get_columns(self, sql: str, aliased: bool = False) -> List[str]: self.aliases_keyed_by_columns = aliases_keyed_by_columns - columns = [ - ( - aliases_keyed_by_columns[col] - if col in aliases_keyed_by_columns - else col - ) - for col in columns - ] + columns = [aliases_keyed_by_columns.get(col, col) for col in columns] if self.client_side_filters: - # In case the WHERE clause is > 75 characters long, we execute the rest of the filters - # client-side. To do this, we need to pull all fields in the client-side WHERE conditions. - # Below code adds these columns to the list of SELECTed fields. + # In case the WHERE clause is > 75 characters long, we execute the rest of + # the filters client-side. To do this, we need to pull all fields in the + # client-side WHERE conditions. Below code adds these columns to the list of + # SELECTed fields. cols_to_add = [v.split()[0] for v in self.client_side_filters.values()] if aliased: cols_to_add = [aliases_keyed_by_columns[col] for col in cols_to_add] @@ -1020,8 +917,8 @@ def _get_columns(self, sql: str, aliased: bool = False) -> List[str]: return columns @staticmethod - def _get_limit(sql: str) -> int: - """Get limit from the query""" + def _get_limit(sql: str) -> int | None: + """Get limit from the query.""" limit_match = re.search("\\sLIMIT ", sql.upper()) if not limit_match: return None @@ -1029,17 +926,17 @@ def _get_limit(sql: str) -> int: return int(sql[limit_match.span()[1] :].split()[0]) @staticmethod - def _get_offset(sql: str) -> int: - """Get offset from the query""" + def _get_offset(sql: str) -> int | None: + """Get offset from the query.""" offset_match = re.search("\\sOFFSET ", sql.upper()) if not offset_match: return None return int(sql[offset_match.span()[1] :].split()[0]) - def query(self, sql: str, sep: str = None) -> None: - """Parse an SQL query into pyRFC commands and save it into - an internal dictionary. + # Holy crap what a mess. TODO: refactor this so it can be even remotely tested... + def query(self, sql: str, sep: str | None = None) -> None: # noqa: C901, PLR0912 + """Parse an SQL query into pyRFC commands and save it into an internal dict. Args: sql (str): The SQL query to be ran. @@ -1049,9 +946,9 @@ def query(self, sql: str, sep: str = None) -> None: Raises: ValueError: If the query is not a SELECT query. """ - if not sql.strip().upper().startswith("SELECT"): - raise ValueError("Only SELECT queries are supported.") + msg = "Only SELECT queries are supported." + raise ValueError(msg) sep = sep if sep is not None else self.sep @@ -1080,9 +977,8 @@ def query(self, sql: str, sep: str = None) -> None: if col_length_reference_column > int( self.rfc_total_col_width_character_limit / 4 ): - raise ValueError( - f"{ref_column} can't be used as unique column, too large." - ) + msg = f"{ref_column} can't be used as unique column, too large." + raise ValueError(msg) local_limit = ( self.rfc_total_col_width_character_limit - col_length_reference_column @@ -1100,7 +996,7 @@ def query(self, sql: str, sep: str = None) -> None: cols.append(col) else: if isinstance(self.rfc_unique_id[0], str) and all( - [rfc_col not in cols for rfc_col in self.rfc_unique_id] + rfc_col not in cols for rfc_col in self.rfc_unique_id ): for rfc_col in self.rfc_unique_id: if rfc_col not in cols: @@ -1108,35 +1004,35 @@ def query(self, sql: str, sep: str = None) -> None: lists_of_columns.append(cols) cols = [col] col_length_total = int(col_length) - else: - if isinstance(self.rfc_unique_id[0], str) and all( - [rfc_col not in cols for rfc_col in self.rfc_unique_id] - ): - for rfc_col in self.rfc_unique_id: - if rfc_col not in cols: - cols.append(rfc_col) - lists_of_columns.append(cols) + + if isinstance(self.rfc_unique_id[0], str) and all( + rfc_col not in cols for rfc_col in self.rfc_unique_id + ): + for rfc_col in self.rfc_unique_id: + if rfc_col not in cols: + cols.append(rfc_col) + lists_of_columns.append(cols) columns = lists_of_columns options = [{"TEXT": where}] if where else None limit = self._get_limit(sql) offset = self._get_offset(sql) - query_json = dict( - QUERY_TABLE=table_name, - FIELDS=columns, - OPTIONS=options, - ROWCOUNT=limit, - ROWSKIPS=offset, - DELIMITER=sep, - ) + query_json = { + "QUERY_TABLE": table_name, + "FIELDS": columns, + "OPTIONS": options, + "ROWCOUNT": limit, + "ROWSKIPS": offset, + "DELIMITER": sep, + } # SAP doesn't understand None, so we filter out non-specified parameters query_json_filtered = { key: query_json[key] for key in query_json if query_json[key] is not None } self._query = query_json_filtered - def call(self, func: str, *args, **kwargs): - """Call a SAP RFC function""" + def call(self, func: str, *args, **kwargs) -> dict[str, Any]: + """Call a SAP RFC function.""" return self.con.call(func, *args, **kwargs) def _get_alias(self, column: str) -> str: @@ -1145,10 +1041,10 @@ def _get_alias(self, column: str) -> str: def _get_client_side_filter_cols(self): return [f[1].split()[0] for f in self.client_side_filters.items()] + # TODO: refactor to remove linter warnings and so this can be tested. @add_viadot_metadata_columns - def to_df(self, tests: dict = None): - """ - Load the results of a query into a pandas DataFrame. + def to_df(self, tests: dict | None = None) -> pd.DataFrame: # noqa: C901, PLR0912, PLR0915 + """Load the results of a query into a pandas DataFrame. Due to SAP limitations, if the length of the WHERE clause is longer than 75 characters, we trim whe WHERE clause and perform the rest of the filtering @@ -1167,9 +1063,9 @@ def to_df(self, tests: dict = None): function from utils. Defaults to None. Returns: - pd.DataFrame: A DataFrame representing the result of the query provided in `PyRFC.query()`. + pd.DataFrame: A DataFrame representing the result of the query provided in + `PyRFC.query()`. """ - params = self._query columns = self.select_columns_aliased sep = self._query.get("DELIMITER") @@ -1178,8 +1074,8 @@ def to_df(self, tests: dict = None): logger.info(f"Data will be downloaded in {len(fields_lists)} chunks.") func = self.func if sep is None: - # automatically find a working separator - SEPARATORS = [ + # Automatically find a working separator. + separators = [ "|", "/t", "#", @@ -1194,12 +1090,13 @@ def to_df(self, tests: dict = None): "$", ] else: - SEPARATORS = [sep] + separators = [sep] - for sep in SEPARATORS: + for sep in separators: logger.info(f"Checking if separator '{sep}' works.") if isinstance(self.rfc_unique_id[0], str): - # columns only for the first chunk and we add the rest later to avoid name conflicts + # Columns only for the first chunk. We add the rest later to avoid name + # conflicts. df = pd.DataFrame(columns=fields_lists[0]) else: df = pd.DataFrame() @@ -1213,45 +1110,44 @@ def to_df(self, tests: dict = None): response = self.call(func, **params) except ABAPApplicationError as e: if e.key == "DATA_BUFFER_EXCEEDED": - raise DataBufferExceeded( - "Character limit per row exceeded. Please select fewer columns." - ) - else: - raise e - # Check and skip if there is no data returned + msg = "Character limit per row exceeded. Please select fewer columns." + raise DataBufferExceededError(msg) from e + raise + # Check and skip if there is no data returned. if response["DATA"]: record_key = "WA" data_raw = np.array(response["DATA"]) del response - # if the reference columns are provided not necessary to remove any extra row. + # If reference columns are provided, it's not necessary to remove + # any extra row. if not isinstance(self.rfc_unique_id[0], str): - row_index, data_raw, start = detect_extra_rows( + row_index, data_raw, start = _detect_extra_rows( row_index, data_raw, chunk, fields ) else: start = False - records = [row for row in gen_split(data_raw, sep, record_key)] + records = list(_gen_split(data_raw, sep, record_key)) del data_raw if ( isinstance(self.rfc_unique_id[0], str) - and not list(df.columns) == fields + and list(df.columns) != fields ): df_tmp = pd.DataFrame(columns=fields) df_tmp[fields] = records - # SAP adds whitespaces to the first extracted column value - # If whitespace is in unique column it must be removed to make a proper merge + # SAP adds whitespaces to the first extracted column value. + # If whitespace is in unique column, it must be removed to make + # a proper merge. for col in self.rfc_unique_id: df_tmp[col] = df_tmp[col].str.strip() df[col] = df[col].str.strip() df = pd.merge(df, df_tmp, on=self.rfc_unique_id, how="outer") + elif not start: + df[fields] = records else: - if not start: - df[fields] = records - else: - df[fields] = np.nan + df[fields] = np.nan chunk += 1 elif not response["DATA"]: logger.warning("No data returned from SAP.") diff --git a/src/viadot/sources/sharepoint.py b/src/viadot/sources/sharepoint.py index d4f05f702..05ec522ef 100644 --- a/src/viadot/sources/sharepoint.py +++ b/src/viadot/sources/sharepoint.py @@ -1,19 +1,27 @@ +"""Sharepoint API connector.""" + import io -import os -from typing import Literal, Optional, Union +from pathlib import Path +import re +from typing import Any, Literal from urllib.parse import urlparse import pandas as pd -import sharepy from pandas._libs.parsers import STR_NA_VALUES from pydantic import BaseModel, root_validator +import sharepy from sharepy.errors import AuthError from viadot.config import get_source_credentials from viadot.exceptions import CredentialError from viadot.signals import SKIP from viadot.sources.base import Source -from viadot.utils import add_viadot_metadata_columns, cleanup_df, validate +from viadot.utils import ( + add_viadot_metadata_columns, + cleanup_df, + validate, + validate_and_reorder_dfs_columns, +) class SharepointCredentials(BaseModel): @@ -22,95 +30,63 @@ class SharepointCredentials(BaseModel): password: str # Sharepoint password @root_validator(pre=True) - def is_configured(cls, credentials): + def is_configured(cls, credentials: dict): # noqa: N805, ANN201, D102 site = credentials.get("site") username = credentials.get("username") password = credentials.get("password") if not (site and username and password): - raise CredentialError( - "'site', 'username', and 'password' credentials are required." - ) + msg = "'site', 'username', and 'password' credentials are required." + raise CredentialError(msg) return credentials -def get_last_segment_from_url( - url: str, -) -> tuple[str, Literal["file"]] | tuple[str, Literal["directory"]]: - """ - Get the last part of the URL and determine if it represents a file or directory. - - This function parses the provided URL, extracts the last segment, and identifies - whether it corresponds to a file (based on the presence of a file extension) - or a directory. - - Args: - url (str): The URL to a SharePoint file or directory. - - Raises: - ValueError: If an invalid URL is provided. - - Returns: - tuple: A tuple where the first element is the last part of the URL (file extension - or folder name) and the second element is a string indicating the type: - - If a file URL is provided, returns (file extension, 'file'). - - If a folder URL is provided, returns (last folder name, 'directory'). - """ - path_parts = urlparse(url).path.split("/") - # Filter out empty parts - non_empty_parts = [part for part in path_parts if part] - - # Check if the last part has a file extension - if non_empty_parts: - last_part = non_empty_parts[-1] - _, extension = os.path.splitext(last_part) - if extension: - return extension, "file" - else: - return last_part, "directory" - else: - raise ValueError("Incorrect URL provided : '{url}'") - - class Sharepoint(Source): - """ - Download Excel files from Sharepoint. - - Args: - credentials (SharepointCredentials): Sharepoint credentials. - config_key (str, optional): The key in the viadot config holding relevant credentials. - """ - - DEFAULT_NA_VALUES = list(STR_NA_VALUES) + DEFAULT_NA_VALUES = tuple(STR_NA_VALUES) def __init__( self, credentials: SharepointCredentials = None, - config_key: Optional[str] = None, + config_key: str | None = None, *args, **kwargs, ): + """Download Excel files from Sharepoint. + + Args: + credentials (SharepointCredentials): Sharepoint credentials. + config_key (str, optional): The key in the viadot config holding relevant + credentials. + """ raw_creds = credentials or get_source_credentials(config_key) or {} validated_creds = dict(SharepointCredentials(**raw_creds)) super().__init__(*args, credentials=validated_creds, **kwargs) def get_connection(self) -> sharepy.session.SharePointSession: + """Establishe a connection to SharePoint. + + Returns: + sharepy.session.SharePointSession: A session object representing + the authenticated connection. + + Raises: + CredentialError: If authentication to SharePoint fails due to incorrect + credentials. + """ try: connection = sharepy.connect( site=self.credentials.get("site"), username=self.credentials.get("username"), password=self.credentials.get("password"), ) - except AuthError: + except AuthError as e: site = self.credentials.get("site") - raise CredentialError( - f"Could not authenticate to {site} with provided credentials." - ) + msg = f"Could not authenticate to {site} with provided credentials." + raise CredentialError(msg) from e return connection def download_file(self, url: str, to_path: list | str) -> None: - """ - Download a file from Sharepoint. + """Download a file from Sharepoint to specific location. Args: url (str): The URL of the file to be downloaded. @@ -129,117 +105,274 @@ def download_file(self, url: str, to_path: list | str) -> None: ) conn.close() - def _download_excel(self, url: str, **kwargs) -> pd.ExcelFile: - endpoint_value, endpoint_type = get_last_segment_from_url(url) + def scan_sharepoint_folder(self, url: str) -> list[str]: + """Scan Sharepoint folder to get all file URLs of all files within it. + + Args: + url (str): The URL of the folder to scan. + + Raises: + ValueError: If the provided URL does not contain the expected '/sites/' + segment. + + Returns: + list[str]: List of URLs pointing to each file within the specified + SharePoint folder. + """ + conn = self.get_connection() + + parsed_url = urlparse(url) + path_parts = parsed_url.path.split("/") + if "sites" in path_parts: + site_index = ( + path_parts.index("sites") + 2 + ) # +2 to include 'sites' and the next segment + site_url = f"{parsed_url.scheme}://{parsed_url.netloc}{'/'.join(path_parts[:site_index])}" + library = "/".join(path_parts[site_index:]) + else: + message = "URL does not contain '/sites/' segment." + raise ValueError(message) + + # -> site_url = company.sharepoint.com/sites/site_name/ + # -> library = /shared_documents/folder/sub_folder/final_folder + endpoint = ( + f"{site_url}/_api/web/GetFolderByServerRelativeUrl('{library}')/Files" + ) + response = conn.get(endpoint) + files = response.json().get("d", {}).get("results", []) + + return [f'{site_url}/{library}{file["Name"]}' for file in files] + + def _get_file_extension(self, url: str) -> str: + """Extracts the file extension from a given URL. + + Parameters: + url (str): The URL from which to extract the file extension. + + Returns: + str: The file extension, including the leading dot (e.g., '.xlsx'). + """ + # Parse the URL to get the path + parsed_url = urlparse(url) + return Path(parsed_url.path).suffix + + def _download_file_stream(self, url: str, **kwargs) -> pd.ExcelFile: + """Download the contents of a file from SharePoint. + + Returns the data as an in-memory byte stream. + + Args: + url (str): The URL of the file to download. + + Returns: + io.BytesIO: An in-memory byte stream containing the file content. + """ if "nrows" in kwargs: - raise ValueError("Parameter 'nrows' is not supported.") + msg = "Parameter 'nrows' is not supported." + raise ValueError(msg) + conn = self.get_connection() - if endpoint_type == "file": - if endpoint_value != ".xlsx": - raise ValueError( - "Only Excel files with 'XLSX' extension can be loaded into a DataFrame." - ) - self.logger.info(f"Downloading data from {url}...") - response = conn.get(url) - bytes_stream = io.BytesIO(response.content) - return pd.ExcelFile(bytes_stream) + self.logger.info(f"Downloading data from {url}...") + response = conn.get(url) + bytes_stream = io.BytesIO(response.content) + + return pd.ExcelFile(bytes_stream) + + def _is_file(self, url: str) -> bool: + """Determines whether a provided URL points to a file based on its structure. + + This function uses a regular expression to check if the URL ends with a + common file extension. It does not make any network requests and purely + relies on the URL structure for its determination. + + Parameters: + url (str): The URL to be checked. + + Returns: + bool: True if the URL is identified as a file based on its extension, + False otherwise. + + Example: + >>> _is_file("https://example.com/file.xlsx") + True + >>> _is_file("https://example.com/folder/") + False + >>> _is_file("https://example.com/folder") + False + """ + # Regular expression for matching file extensions + file_extension_pattern = re.compile(r"\.[a-zA-Z0-9]+$") + + return bool(file_extension_pattern.search(url)) - def _convert_all_to_string_type(self, df: pd.DataFrame) -> pd.DataFrame: - """Convert all column data types in the DataFrame to strings. + def _handle_multiple_files( + self, + url: str, + file_sheet_mapping: dict, + na_values: list[str] | None = None, + **kwargs, + ): + """Handle downloading and parsing multiple Excel files from a SharePoint folder. + + Args: + url (str): The base URL of the SharePoint folder containing the files. + file_sheet_mapping (dict): A dictionary mapping file names to sheet names + or indexes. The keys are file names, and the values are sheet + names/indices. + na_values (Optional[list[str]]): Additional strings to recognize as NA/NaN. + + Returns: + pd.DataFrame: A concatenated DataFrame containing the data from all + specified files and sheets. + + Raises: + ValueError: If the file extension is not supported. + """ + dfs = [ + self._load_and_parse( + file_url=url + file, sheet_name=sheet, na_values=na_values, **kwargs + ) + for file, sheet in file_sheet_mapping.items() + ] + return pd.concat(validate_and_reorder_dfs_columns(dfs)) - This method converts all the values in the DataFrame to strings, - handling NaN values by replacing them with None. + def _load_and_parse( + self, + file_url: str, + sheet_name: str | list[str] | None = None, + na_values: list[str] | None = None, + **kwargs, + ): + """Loads and parses an Excel file from a URL. Args: - df (pd.DataFrame): DataFrame to convert. + file_url (str): The URL of the file to download and parse. + sheet_name (Optional[Union[str, list[str]]]): The name(s) or index(es) of + the sheet(s) to parse. If None, all sheets are parsed. + na_values (Optional[list[str]]): Additional strings to recognize as NA/NaN. + **kwargs: Additional keyword arguments to pass to the pandas read function. Returns: - pd.DataFrame: DataFrame with all data types converted to string. - Columns that contain only None values are also - converted to string type. + pd.DataFrame: The parsed data as a pandas DataFrame. + + Raises: + ValueError: If the file extension is not supported. """ - df_converted = df.astype(str).where(pd.notnull(df), None) - return self._empty_column_to_string(df=df_converted) + file_extension = self._get_file_extension(file_url) + file_stream = self._download_file_stream(file_url) - def _empty_column_to_string(self, df: pd.DataFrame) -> pd.DataFrame: - """Convert the type of columns containing only None values to string. + if file_extension == ".xlsx": + return self._parse_excel(file_stream, sheet_name, na_values, **kwargs) + msg = "Only Excel (.xlsx) files can be loaded into a DataFrame." + raise ValueError(msg) - This method iterates through the DataFrame columns and converts the - type of any column that contains only None values to string. + def _parse_excel( + self, + excel_file: pd.ExcelFile, + sheet_name: str | list[str] | None = None, + na_values: list[str] | None = None, + **kwargs, + ): + """Parses an Excel file into a DataFrame. Casts all columns to string. Args: - df (pd.DataFrame): DataFrame to convert. + excel_file: An ExcelFile object containing the data to parse. + sheet_name (Optional[Union[str, list[str]]]): The name(s) or index(es) of + the sheet(s) to parse. If None, all sheets are parsed. + na_values (Optional[list[str]]): Additional strings to recognize as NA/NaN. + **kwargs: Additional keyword arguments to pass to the pandas read function. Returns: - pd.DataFrame: Updated DataFrame with columns containing only - None values converted to string type. All columns - in the returned DataFrame will be of type object/string. + pd.DataFrame: The parsed data as a pandas DataFrame. """ - for col in df.columns: - if df[col].isnull().all(): - df[col] = df[col].astype("string") - return df + return pd.concat( + [ + excel_file.parse( + sheet, + keep_default_na=False, + na_values=na_values or list(self.DEFAULT_NA_VALUES), + dtype=str, # Ensure all columns are read as strings + **kwargs, + ) + for sheet in ([sheet_name] if sheet_name else excel_file.sheet_names) + ] + ) @add_viadot_metadata_columns def to_df( self, url: str, - sheet_name: Optional[Union[str, list, int]] = None, - if_empty: str = "warn", - tests: dict = {}, + sheet_name: str | list[str] | None = None, + if_empty: Literal["warn", "skip", "fail"] = "warn", + tests: dict[str, Any] | None = None, + file_sheet_mapping: dict[str, str | int | list[str]] | None = None, na_values: list[str] | None = None, **kwargs, ) -> pd.DataFrame: - """ - Load an Excel file into a pandas DataFrame. + """Load an Excel file or files from a SharePoint URL into a pandas DataFrame. + + This method handles downloading the file(s), parsing the content, and converting + it into a pandas DataFrame. It supports both single file URLs and folder URLs + with multiple files. Args: url (str): The URL of the file to be downloaded. - sheet_name (Optional[Union[str, list, int]], optional): Strings are used for sheet names. - Integers are used in zero-indexed sheet positions (chart sheets do not count - as a sheet position). Lists of strings/integers are used to request multiple sheets. - Specify None to get all worksheets. Defaults to None. - if_empty (str, optional): What to do if the file is empty. Defaults to "warn". - tests (Dict[str], optional): A dictionary with optional list of tests + sheet_name (Optional[Union[str, list, int]], optional): Strings are used for + sheet names. Integers are used in zero-indexed sheet positions (chart + sheets do not count as a sheet position). Lists of strings/integers are + used to request multiple sheets. Specify None to get all worksheets. + Defaults to None. + if_empty (Literal["warn", "skip", "fail"], optional): Action to take if + the DataFrame is empty. + - "warn": Logs a warning. + - "skip": Skips the operation. + - "fail": Raises an error. + Defaults to "warn". + tests (Dict[str, Any], optional): A dictionary with optional list of tests to verify the output dataframe. If defined, triggers the `validate` function from utils. Defaults to None. - na_values (list[str] | None): Additional strings to recognize as NA/NaN. - If list passed, the specific NA values for each column will be recognized. - Defaults to None. - If None then the "DEFAULT_NA_VALUES" is assigned list(" ", "#N/A", "#N/A N/A", - "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND", "1.#QNAN", - "", "N/A", "NA", "NULL", "NaN", "None", "n/a", "nan", "null"). - If list passed, the specific NA values for each column will be recognized. - Defaults to None. - kwargs (dict[str, Any], optional): Keyword arguments to pass to pd.ExcelFile.parse(). Note that - `nrows` is not supported. + file_sheet_mapping (dict[str, Union[str, int, list[str]]], optional): + Mapping of file names to sheet names or indices. The keys are file names + and the values are sheet names/indices. Used when multiple files are + involved. Defaults to None. + na_values (list[str], optional): Additional strings to recognize as NA/NaN. + If list passed, the specific NA values for each column will be + recognized. Defaults to None. + kwargs (dict[str, Any], optional): Keyword arguments to pass to + pd.ExcelFile.parse(). Note that `nrows` is not supported. Returns: pd.DataFrame: The resulting data as a pandas DataFrame. - """ - excel_file = self._download_excel(url=url, **kwargs) - if sheet_name: - df = excel_file.parse( - sheet_name=sheet_name, - keep_default_na=False, - na_values=na_values or self.DEFAULT_NA_VALUES, + Raises: + ValueError: If the file extension is not supported or if `if_empty` is set + to "fail" and the DataFrame is empty. + SKIP: If `if_empty` is set to "skip" and the DataFrame is empty. + """ + if self._is_file(url): + df = self._load_and_parse( + file_url=url, sheet_name=sheet_name, na_values=na_values, **kwargs + ) + elif file_sheet_mapping: + df = self._handle_multiple_files( + url=url, + file_sheet_mapping=file_sheet_mapping, + na_values=na_values, **kwargs, ) - df["sheet_name"] = sheet_name else: - sheets: list[pd.DataFrame] = [] - for sheet_name in excel_file.sheet_names: - sheet = excel_file.parse( + list_of_urls = self.scan_sharepoint_folder(url) + dfs = [ + self._load_and_parse( + file_url=file_url, sheet_name=sheet_name, - keep_default_na=False, - na_values=na_values or self.DEFAULT_NA_VALUES, + na_values=na_values, **kwargs, ) - sheet["sheet_name"] = sheet_name - sheets.append(sheet) - df = pd.concat(sheets) + for file_url in list_of_urls + ] + df = pd.concat(validate_and_reorder_dfs_columns(dfs)) if df.empty: try: @@ -247,11 +380,11 @@ def to_df( except SKIP: return pd.DataFrame() else: - self.logger.info(f"Successfully downloaded {len(df)} of data.") + self.logger.info(f"Successfully downloaded {len(df)} rows of data.") df_clean = cleanup_df(df) if tests: validate(df=df_clean, tests=tests) - return self._convert_all_to_string_type(df=df_clean) + return df_clean diff --git a/src/viadot/sources/sql_server.py b/src/viadot/sources/sql_server.py new file mode 100644 index 000000000..4d866d8fb --- /dev/null +++ b/src/viadot/sources/sql_server.py @@ -0,0 +1,117 @@ +"""SQL Server source class.""" + +from datetime import datetime, timedelta, timezone +import struct + +from pydantic import BaseModel, SecretStr + +from viadot.config import get_source_credentials +from viadot.sources.base import SQL + + +class SQLServerCredentials(BaseModel): + user: str + password: str | SecretStr | None = None + server: str + driver: str = "ODBC Driver 17 for SQL Server" + db_name: str | None = None + + +class SQLServer(SQL): + DEFAULT_SCHEMA = "dbo" + + def __init__( + self, + credentials: SQLServerCredentials | None = None, + config_key: str | None = None, + *args, + **kwargs, + ): + """Connector for SQL Server. + + Args: + credentials (SQLServerCredentials | None, optional): The credentials to use. + Defaults to None. + config_key (str | None, optional): The viadot config key from which to read + the credentials. Defaults to None. + """ + raw_creds = credentials or get_source_credentials(config_key) or {} + validated_creds = SQLServerCredentials(**raw_creds).dict( + by_alias=True + ) # validate the credentials + + super().__init__(*args, credentials=validated_creds, **kwargs) + self.server = self.credentials.get("server") + self.username = self.credentials.get("username") + self.password = self.credentials.get("password") + self.driver = self.credentials.get("driver") + self.db_name = self.credentials.get("db_name") + + self.con.add_output_converter(-155, self._handle_datetimeoffset) + + @property + def schemas(self) -> list[str]: + """Return a list of all schemas.""" + schemas_tuples = self.run("SELECT s.name as schema_name from sys.schemas s") + return [schema_tuple[0] for schema_tuple in schemas_tuples] + + @property + def tables(self) -> list[str]: + """Return a list of all tables in the database.""" + tables_tuples = self.run( + "SELECT schema_name(t.schema_id), t.name FROM sys.tables t" + ) + return [".".join(row) for row in tables_tuples] + + @staticmethod + def _handle_datetimeoffset(dto_value: str) -> datetime: + """Adds support for SQL Server's custom `datetimeoffset` type. + + This type is not handled natively by ODBC/pyodbc. + + See: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794 + """ + ( + year, + month, + day, + hour, + minute, + second, + nanoseconds, + offset_hours, + offset_minutes, + ) = struct.unpack("<6hI2h", dto_value) + return datetime( + year, + month, + day, + hour, + minute, + second, + nanoseconds // 1000, + tzinfo=timezone(timedelta(hours=offset_hours, minutes=offset_minutes)), + ) + + def exists(self, table: str, schema: str | None = None) -> bool: + """Check whether a table exists. + + Args: + table (str): The table to be checked. + schema (str, optional): The schema where the table is located. + Defaults to 'dbo'. + + Returns: + bool: Whether the table exists. + """ + if not schema: + schema = self.DEFAULT_SCHEMA + + list_table_info_query = f""" + SELECT * + FROM sys.tables t + JOIN sys.schemas s + ON t.schema_id = s.schema_id + WHERE s.name = '{schema}' AND t.name = '{table}' + """ # noqa: S608 + return bool(self.run(list_table_info_query)) diff --git a/src/viadot/sources/sqlite.py b/src/viadot/sources/sqlite.py index de8f498d3..b6d0d2dd1 100644 --- a/src/viadot/sources/sqlite.py +++ b/src/viadot/sources/sqlite.py @@ -1,20 +1,16 @@ +"""SQLite source connector.""" + from viadot.sources.base import SQL class SQLite(SQL): - """A SQLite source - - Args: - server (str): server string, usually localhost - db (str): the file path to the db e.g. /home/somedb.sqlite - """ - def __init__( self, query_timeout: int = 60, *args, **kwargs, ): + """SQLite connector.""" super().__init__( *args, driver="/usr/lib/x86_64-linux-gnu/odbc/libsqlite3odbc.so", @@ -24,9 +20,10 @@ def __init__( self.credentials["server"] = "localhost" @property - def conn_str(self): + def conn_str(self) -> str: """Generate a connection string from params or config. - Note that the user and password are escapedd with '{}' characters. + + Note that the user and password are escaped with '{}' characters. Returns: str: The ODBC connection string. @@ -35,19 +32,17 @@ def conn_str(self): server = self.credentials["server"] db_name = self.credentials["db_name"] - conn_str = f"DRIVER={{{driver}}};SERVER={server};DATABASE={db_name};" - - return conn_str + return f"DRIVER={{{driver}}};SERVER={server};DATABASE={db_name};" - def _check_if_table_exists(self, table: str, schema: str = None) -> bool: + def _check_if_table_exists(self, table: str, schema: str | None = None) -> bool: """Checks if table exists. + Args: table (str): Table name. schema (str, optional): Schema name. Defaults to None. """ fqn = f"{schema}.{table}" if schema is not None else table exists_query = ( - f"SELECT name FROM sqlite_master WHERE type='table' AND name='{fqn}'" + f"SELECT name FROM sqlite_master WHERE type='table' AND name='{fqn}'" # noqa: S608 ) - exists = bool(self.run(exists_query)) - return exists + return bool(self.run(exists_query)) diff --git a/src/viadot/sources/trino.py b/src/viadot/sources/trino.py index dfc27c90d..d32cf960b 100644 --- a/src/viadot/sources/trino.py +++ b/src/viadot/sources/trino.py @@ -1,13 +1,16 @@ +"""A module for interacting with Trino as a database.""" + +from collections.abc import Generator +from contextlib import contextmanager import re +from typing import Literal import warnings -from contextlib import contextmanager -from typing import Generator, Literal, Optional import pandas as pd import pyarrow as pa from pydantic import BaseModel, Field from sqlalchemy import create_engine, text -from sqlalchemy.engine import Connection +from sqlalchemy.engine import Connection, CursorResult from sqlalchemy.exc import SADeprecationWarning from trino.auth import BasicAuthentication @@ -15,38 +18,41 @@ from viadot.sources.base import Source from viadot.utils import get_fqn + # Triggered by trino lib warnings.filterwarnings("ignore", category=SADeprecationWarning) class TrinoCredentials(BaseModel): + """Trino credentials.""" + http_scheme: str = "https" host: str = "localhost" port: int = 443 user: str - password: Optional[str] = None + password: str | None = None catalog: str - schema_name: Optional[str] = Field(None, alias="schema") + schema_name: str | None = Field(None, alias="schema") verify: bool = True class Trino(Source): - """ - A class for interacting with Trino as a database. Currently supports only generic - and Iceberg operations. - - Args: - credentials (TrinoCredentials): Trino credentials. - config_key (str, optional): The key in the viadot config holding relevant credentials. - """ - def __init__( self, - credentials: TrinoCredentials = None, - config_key: str = None, + credentials: TrinoCredentials | None = None, + config_key: str | None = None, *args, **kwargs, ): + """A class for interacting with Trino as a database. + + Currently supports only generic and Iceberg operations. + + Args: + credentials (TrinoCredentials): Trino credentials. + config_key (str, optional): The key in the viadot config holding relevant + credentials. + """ raw_creds = credentials or get_source_credentials(config_key) or {} validated_creds = TrinoCredentials(**raw_creds).dict( by_alias=True @@ -76,12 +82,10 @@ def __init__( ) @contextmanager - def get_connection(self): + def get_connection(self) -> Generator[Connection, None, None]: """Provide a transactional scope around a series of operations. - ---- Examples: - >>> trino = Trino() >>> with trino.get_connection() as connection: >>> trino.run(query1, connection=connection) @@ -98,11 +102,25 @@ def get_connection(self): connection.close() def get_tables(self, schema_name: str) -> list[str]: + """List all tables in a schema. + + Args: + schema_name (str): _description_ + + Returns: + list[str]: _description_ + """ query = f"SHOW TABLES FROM {schema_name}" with self.get_connection() as connection: return list(self.run(query, connection=connection)) - def drop_table(self, table_name: str, schema_name: str = None) -> None: + def drop_table(self, table_name: str, schema_name: str | None = None) -> None: + """Drop a table. + + Args: + table_name (str): _description_ + schema_name (str | None, optional): _description_. Defaults to None. + """ fqn = get_fqn(schema_name=schema_name, table_name=table_name) query = f"DROP TABLE IF EXISTS {fqn}" @@ -111,9 +129,15 @@ def drop_table(self, table_name: str, schema_name: str = None) -> None: self.run(query, connection=connection) self.logger.info(f"Table '{fqn}' has been successfully dropped.") - def delete_table(self, table_name: str, schema_name: str = None) -> None: + def delete_table(self, table_name: str, schema_name: str | None = None) -> None: + """Delete all data from a table. + + Args: + table_name (str): _description_ + schema_name (str | None, optional): _description_. Defaults to None. + """ fqn = get_fqn(schema_name=schema_name, table_name=table_name) - query = f"DELETE FROM {fqn}" + query = f"DELETE FROM {fqn}" # noqa: S608 self.logger.info(f"Removing all data from table '{fqn}'...") with self.get_connection() as connection: self.run(query, connection=connection) @@ -130,17 +154,28 @@ def _check_if_table_exists(self, table_name: str, schema_name: str) -> None: return len(results) > 0 def get_schemas(self) -> list[str]: + """List all schemas in the database. + + Returns: + list[str]: _description_ + """ query = "SHOW SCHEMAS" with self.get_connection() as connection: return list(self.run(query, connection=connection)) def _check_if_schema_exists(self, schema_name: str) -> None: - query = f"SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = '{schema_name}'" + query = f"SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = '{schema_name}'" # noqa: S608 with self.get_connection() as connection: results = list(self.run(query, connection=connection)) return bool(results) def drop_schema(self, schema_name: str, cascade: bool = False) -> None: + """Drop a schema. + + Args: + schema_name (str): _description_ + cascade (bool, optional): _description_. Defaults to False. + """ if not self._check_if_schema_exists(schema_name): return @@ -159,11 +194,24 @@ def create_iceberg_schema( location: str, if_exists: Literal["fail", "skip"] = "fail", ) -> None: + """Create an Iceberg schema. + + Args: + schema_name (str): _description_ + location (str): _description_ + if_exists (Literal["fail", "skip"], optional): What + to do if the schema already exists. Defaults to "fail". + + Raises: + ValueError: _description_ + """ exists = self._check_if_schema_exists(schema_name) - if exists and if_exists == "fail": - raise ValueError(f"Schema '{schema_name}' already exists.") - else: + if exists: + if if_exists == "fail": + msg = f"Schema '{schema_name}' already exists." + raise ValueError(msg) + self.logger.info(f"Schema '{schema_name}' already exists. Skipping...") return @@ -182,10 +230,23 @@ def create_iceberg_table_from_arrow( table_name: str, schema_name: str | None = None, location: str | None = None, - format: Literal["PARQUET", "ORC"] = "PARQUET", + file_format: Literal["PARQUET", "ORC"] = "PARQUET", partition_cols: list[str] | None = None, sort_by: list[str] | None = None, ) -> None: + """Create an Iceberg table from a pyarrow Table. + + Args: + table (pa.Table): _description_ + table_name (str): _description_ + schema_name (str | None, optional): _description_. Defaults to None. + location (str | None, optional): _description_. Defaults to None. + file_format (Literal["PARQUET", "ORC"], optional): The + file format to use. Defaults to "PARQUET". + partition_cols (list[str] | None, optional): The partition columns to use. + Defaults to None. + sort_by (list[str] | None, optional): _description_. Defaults to None. + """ columns = table.schema.names types = [self.pyarrow_to_trino_type(str(typ)) for typ in table.schema.types] create_table_query = self._create_table_query( @@ -194,7 +255,7 @@ def create_iceberg_table_from_arrow( columns=columns, types=types, location=location, - format=format, + file_format=file_format, partition_cols=partition_cols, sort_by_cols=sort_by, ) @@ -209,19 +270,32 @@ def create_iceberg_table_from_pandas( self, df: pd.DataFrame, table_name: str, - schema_name: str = None, - location: str = None, - format: Literal["PARQUET", "ORC"] = "PARQUET", - partition_cols: list[str] = None, - sort_by: list[str] = None, + schema_name: str | None = None, + location: str | None = None, + file_format: Literal["PARQUET", "ORC"] = "PARQUET", + partition_cols: list[str] | None = None, + sort_by: list[str] | None = None, ) -> None: + """Create an Iceberg table from a pandas DataFrame. + + Args: + df (pd.DataFrame): _description_ + table_name (str): _description_ + schema_name (str | None, optional): _description_. Defaults to None. + location (str | None, optional): _description_. Defaults to None. + file_format (Literal["PARQUET", "ORC"], optional): The + file format to use. Defaults to "PARQUET". + partition_cols (list[str] | None, optional): The partition columns to use. + Defaults to None. + sort_by (list[str] | None, optional): _description_. Defaults to None. + """ pa_table = pa.Table.from_pandas(df) self.create_iceberg_table_from_arrow( table=pa_table, schema_name=schema_name, table_name=table_name, location=location, - format=format, + file_format=file_format, partition_cols=partition_cols, sort_by=sort_by, ) @@ -231,17 +305,17 @@ def _create_table_query( table_name: str, columns: list[str], types: list[str], - schema_name: str = None, - location: str = None, - format: Literal["PARQUET", "ORC"] = "PARQUET", - partition_cols: list[str] = None, - sort_by_cols: list[str] = None, - ): + schema_name: str | None = None, + location: str | None = None, + file_format: Literal["PARQUET", "ORC"] = "PARQUET", + partition_cols: list[str] | None = None, + sort_by_cols: list[str] | None = None, + ) -> str: cols_and_dtypes = ",\n\t".join( - col + " " + dtype for col, dtype in zip(columns, types) + col + " " + dtype for col, dtype in zip(columns, types, strict=False) ) fqn = get_fqn(schema_name=schema_name, table_name=table_name) - with_clause = f"format = '{format}'" + with_clause = f"format = '{file_format}'" if partition_cols: with_clause += ",\n\tpartitioning = ARRAY" + str(partition_cols) @@ -252,7 +326,7 @@ def _create_table_query( if location: with_clause += f",\n\tlocation = '{location}'" - query = f""" + return f""" CREATE TABLE IF NOT EXISTS {fqn} ( {cols_and_dtypes} ) @@ -260,12 +334,20 @@ def _create_table_query( {with_clause} )""" - return query - def run( self, sql: str, connection: Connection ) -> Generator[tuple, None, None] | None: - def row_generator(result): + """Run a SQL query. + + Args: + sql (str): _description_ + connection (Connection): _description_ + + Yields: + Generator[tuple, None, None] | None: _description_ + """ + + def row_generator(result: CursorResult): # Fetch rows in chunks of size `yield_per`. # This has to be inside a function due to how Python generators work. for partition in result.partitions(): @@ -277,16 +359,24 @@ def row_generator(result): # Execute with server-side cursor of size 5000. result = connection.execution_options(yield_per=5000).execute(text(sql)) except Exception as e: - raise ValueError(f"Failed executing SQL:\n{sql}") from e + msg = f"Failed executing SQL:\n{sql}" + raise ValueError(msg) from e query_keywords = ["SELECT", "SHOW", "PRAGMA", "WITH"] is_query = any(sql.strip().upper().startswith(word) for word in query_keywords) - if is_query: - return row_generator(result) + return row_generator(result) if is_query else None @staticmethod def pyarrow_to_trino_type(pyarrow_type: str) -> str: + """Convert a pyarrow data type to a Trino type. + + Args: + pyarrow_type (str): The Pyarrow type to convert. + + Returns: + str: The Trino type. + """ mapping = { "string": "VARCHAR", "large_string": "VARCHAR", @@ -317,9 +407,10 @@ def pyarrow_to_trino_type(pyarrow_type: str) -> str: return mapped_type - def _check_connection(self): + def _check_connection(self) -> None: try: with self.get_connection() as connection: self.run("select 1", connection=connection) except Exception as e: - raise ValueError(f"Failed to connect to Trino server at {self.host}") from e + msg = f"Failed to connect to Trino server at {self.host}" + raise ValueError(msg) from e diff --git a/src/viadot/sources/uk_carbon_intensity.py b/src/viadot/sources/uk_carbon_intensity.py index 7594c9949..e5111ec28 100644 --- a/src/viadot/sources/uk_carbon_intensity.py +++ b/src/viadot/sources/uk_carbon_intensity.py @@ -1,3 +1,5 @@ +"""UK Carbon Intensity connector.""" + import pandas as pd import requests @@ -6,35 +8,33 @@ class UKCarbonIntensity(Source): - """Fetches data of Carbon Intensity of the UK Power Grid. + def __init__(self, *args, api_url: str | None = None, **kwargs): + """Fetch data of Carbon Intensity of the UK Power Grid. - Documentation for this source API is located - at: https://carbon-intensity.github.io/api-definitions/#carbon-intensity-api-v2-0-0 + Documentation for this source API is located + at: https://carbon-intensity.github.io/api-definitions/#carbon-intensity-api-v2-0-0 - Parameters - ---------- - api_url : str, optional + Parameters + ---------- + api_url : str, optional The URL endpoint to call, by default None - """ - - def __init__(self, *args, api_url: str = None, **kwargs): + """ super().__init__(*args, **kwargs) self.api_url = api_url self.API_ENDPOINT = "https://api.carbonintensity.org.uk" def to_json(self) -> dict: - """Creates json file""" + """Creates json file.""" url = f"{self.API_ENDPOINT}{self.api_url}" headers = {"Accept": "application/json"} - response = requests.get(url, params={}, headers=headers) + response = requests.get(url, params={}, headers=headers, timeout=10) if response.ok: return response.json() - else: - raise f"Error {response.json()}" + raise f"Error {response.json()}" @add_viadot_metadata_columns def to_df(self, if_empty: str = "warn") -> pd.DataFrame: - """Returns a pandas DataFrame with flattened data + """Returns a pandas DataFrame with flattened data. Returns: pandas.DataFrame: A Pandas DataFrame @@ -83,6 +83,6 @@ def to_df(self, if_empty: str = "warn") -> pd.DataFrame: ) return df - def query(self, api_url: str) -> bool: - self.api_url = api_url - return True + def query(self) -> None: + """Queries the API.""" + ... diff --git a/src/viadot/task_utils.py b/src/viadot/task_utils.py deleted file mode 100644 index 8741cdd51..000000000 --- a/src/viadot/task_utils.py +++ /dev/null @@ -1,456 +0,0 @@ -import copy -import json -import os -import shutil -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import List, Literal, Union, cast - -import pandas as pd -import prefect -import pyarrow as pa -import pyarrow.dataset as ds -from prefect import Flow, Task, task -from prefect.engine.state import Failed -from prefect.tasks.secrets import PrefectSecret -from prefect.utilities import logging -from sendgrid import SendGridAPIClient -from sendgrid.helpers.mail import Mail -from toolz import curry -from visions.functional import infer_type -from visions.typesets.complete_set import CompleteSet - -from viadot.config import local_config -from viadot.tasks import AzureKeyVaultSecret - -logger = logging.get_logger() -METADATA_COLUMNS = {"_viadot_downloaded_at_utc": "DATETIME"} - - -@task -def add_ingestion_metadata_task( - df: pd.DataFrame, -): - """Add ingestion metadata columns, eg. data download date - - Args: - df (pd.DataFrame): input DataFrame. - """ - df2 = df.copy(deep=True) - df2["_viadot_downloaded_at_utc"] = datetime.now(timezone.utc).replace(microsecond=0) - return df2 - - -@task -def get_latest_timestamp_file_path(files: List[str]) -> str: - """ - Return the name of the latest file in a given data lake directory, - given a list of paths in that directory. Such list can be obtained using the - `AzureDataLakeList` task. This task is useful for working with immutable data lakes as - the data is often written in the format /path/table_name/TIMESTAMP.parquet. - """ - - logger = prefect.context.get("logger") - - file_names = [Path(file).stem for file in files] - latest_file_name = max(file_names, key=lambda d: datetime.fromisoformat(d)) - latest_file = files[file_names.index(latest_file_name)] - - logger.debug(f"Latest file: {latest_file}") - - return latest_file - - -@task -def dtypes_to_json_task(dtypes_dict, local_json_path: str): - """ - Creates json file from a dictionary. - Args: - dtypes_dict (dict): Dictionary containing data types. - local_json_path (str): Path to local json file. - """ - with open(local_json_path, "w") as fp: - json.dump(dtypes_dict, fp) - - -@task -def chunk_df(df: pd.DataFrame, size: int = 10_000) -> List[pd.DataFrame]: - """ - Creates pandas Dataframes list of chunks with a given size. - Args: - df (pd.DataFrame): Input pandas DataFrame. - size (int, optional): Size of a chunk. Defaults to 10000. - """ - n_rows = df.shape[0] - chunks = [df[i : i + size] for i in range(0, n_rows, size)] - return chunks - - -@task -def df_get_data_types_task(df: pd.DataFrame) -> dict: - """ - Returns dictionary containing datatypes of pandas DataFrame columns. - Args: - df (pd.DataFrame): Input pandas DataFrame. - """ - typeset = CompleteSet() - dtypes = infer_type(df, typeset) - dtypes_dict = {k: str(v) for k, v in dtypes.items()} - return dtypes_dict - - -@task -def get_sql_dtypes_from_df(df: pd.DataFrame) -> dict: - """Obtain SQL data types from a pandas DataFrame""" - typeset = CompleteSet() - dtypes = infer_type(df.head(10000), typeset) - dtypes_dict = {k: str(v) for k, v in dtypes.items()} - dict_mapping = { - "Float": "REAL", - "Image": None, - "Categorical": "VARCHAR(500)", - "Time": "TIME", - "Boolean": "VARCHAR(5)", # Bool is True/False, Microsoft expects 0/1 - "DateTime": "DATETIMEOFFSET", # DATETIMEOFFSET is the only timezone-aware dtype in TSQL - "Object": "VARCHAR(500)", - "EmailAddress": "VARCHAR(50)", - "File": None, - "Geometry": "GEOMETRY", - "Ordinal": "INT", - "Integer": "INT", - "Generic": "VARCHAR(500)", - "UUID": "VARCHAR(50)", # Microsoft uses a custom UUID format so we can't use it - "Complex": None, - "Date": "DATE", - "String": "VARCHAR(500)", - "IPAddress": "VARCHAR(39)", - "Path": "VARCHAR(255)", - "TimeDelta": "VARCHAR(20)", # datetime.datetime.timedelta; eg. '1 days 11:00:00' - "URL": "VARCHAR(255)", - "Count": "INT", - } - dict_dtypes_mapped = {} - for k in dtypes_dict: - dict_dtypes_mapped[k] = dict_mapping[dtypes_dict[k]] - - # This is required as pandas cannot handle mixed dtypes in Object columns - dtypes_dict_fixed = { - k: ("String" if v == "Object" else str(v)) - for k, v in dict_dtypes_mapped.items() - } - - return dtypes_dict_fixed - - -@task -def update_dict(d: dict, d_new: dict) -> dict: - d_copy = copy.deepcopy(d) - d_copy.update(d_new) - return d_copy - - -@task -def df_map_mixed_dtypes_for_parquet( - df: pd.DataFrame, dtypes_dict: dict -) -> pd.DataFrame: - """ - Pandas is not able to handle mixed dtypes in the column in to_parquet - Mapping 'object' visions dtype to 'string' dtype to allow Pandas to_parquet - - Args: - dict_dtypes_mapped (dict): Data types dictionary inferenced by Visions - df (pd.DataFrame): input DataFrame. - - Returns: - df_mapped (pd.DataFrame): Pandas DataFrame with mapped Data Types to workaround Pandas to_parquet bug connected with mixed dtypes in object:. - """ - df_mapped = df.copy() - for col, dtype in dtypes_dict.items(): - if dtype == "Object": - df_mapped[col] = df_mapped[col].astype("string") - return df_mapped - - -@task -def update_dtypes_dict(dtypes_dict: dict) -> dict: - """ - Task to update dtypes_dictionary that will be stored in the schema. It's required due to workaround Pandas to_parquet bug connected with mixed dtypes in object - - Args: - dtypes_dict (dict): Data types dictionary inferenced by Visions - - Returns: - dtypes_dict_updated (dict): Data types dictionary updated to follow Pandas requeirments in to_parquet functionality. - """ - dtypes_dict_updated = { - k: ("String" if v == "Object" else str(v)) for k, v in dtypes_dict.items() - } - - return dtypes_dict_updated - - -@task -def df_to_csv( - df: pd.DataFrame, - path: str, - sep="\t", - if_exists: Literal["append", "replace", "skip"] = "replace", - **kwargs, -) -> None: - """ - Task to create csv file based on pandas DataFrame. - Args: - df (pd.DataFrame): Input pandas DataFrame. - path (str): Path to output csv file. - sep (str, optional): The separator to use in the CSV. Defaults to "\t". - if_exists (Literal["append", "replace", "skip"], optional): What to do if the table exists. Defaults to "replace". - """ - - if if_exists == "append" and os.path.isfile(path): - csv_df = pd.read_csv(path, sep=sep) - out_df = pd.concat([csv_df, df]) - elif if_exists == "replace": - out_df = df - elif if_exists == "skip" and os.path.isfile(path): - logger.info("Skipped.") - return - else: - out_df = df - - # create directories if they don't exist - try: - if not os.path.isfile(path): - directory = os.path.dirname(path) - os.makedirs(directory, exist_ok=True) - except Exception: - pass - - out_df.to_csv(path, index=False, sep=sep) - - -@task -def df_to_parquet( - df: pd.DataFrame, - path: str, - if_exists: Literal["append", "replace", "skip"] = "replace", - **kwargs, -) -> None: - """ - Task to create parquet file based on pandas DataFrame. - Args: - df (pd.DataFrame): Input pandas DataFrame. - path (str): Path to output parquet file. - if_exists (Literal["append", "replace", "skip"], optional): What to do if the table exists. Defaults to "replace". - """ - if if_exists == "append" and os.path.isfile(path): - parquet_df = pd.read_parquet(path) - out_df = pd.concat([parquet_df, df]) - elif if_exists == "replace": - out_df = df - elif if_exists == "skip": - logger.info("Skipped.") - return - else: - out_df = df - - # create directories if they don't exist - try: - if not os.path.isfile(path): - directory = os.path.dirname(path) - os.makedirs(directory, exist_ok=True) - except Exception: - pass - - out_df.to_parquet(path, index=False, **kwargs) - - -@task -def union_dfs_task(dfs: List[pd.DataFrame]): - """ - Create one DataFrame from a list of pandas DataFrames. - Args: - dfs (List[pd.DataFrame]): List of pandas Dataframes to concat. In case of different size of DataFrames NaN values can appear. - """ - return pd.concat(dfs, ignore_index=True) - - -@task -def write_to_json(dict_, path): - """ - Creates json file from a dictionary. Log record informs about the writing file proccess. - Args: - dict_ (dict): Dictionary. - path (str): Path to local json file. - """ - logger = prefect.context.get("logger") - - if os.path.isfile(path): - logger.warning(f"File {path} already exists. Overwriting...") - else: - logger.debug(f"Writing to {path}...") - - # create parent directories if they don't exist - Path(path).parent.mkdir(parents=True, exist_ok=True) - with open(path, mode="w") as f: - json.dump(dict_, f) - - logger.debug(f"Successfully wrote to {path}.") - - -@task -def cleanup_validation_clutter(expectations_path): - ge_project_path = Path(expectations_path).parent - shutil.rmtree(ge_project_path) - - -@task -def df_converts_bytes_to_int(df: pd.DataFrame) -> pd.DataFrame: - logger = prefect.context.get("logger") - logger.info("Converting bytes in dataframe columns to list of integers") - return df.applymap(lambda x: list(map(int, x)) if isinstance(x, bytes) else x) # noqa - - -@task( - max_retries=3, - retry_delay=timedelta(seconds=10), -) -def df_to_dataset( - df: pd.DataFrame, partitioning_flavor="hive", format="parquet", **kwargs -) -> None: - """ - Use `pyarrow.dataset.write_to_dataset()` to write from a pandas DataFrame to a dataset. - This enables several data lake-specific optimizations such as parallel writes, partitioning, - and file size (via `max_rows_per_file` parameter). - - Args: - df (pd.DataFrame): The pandas DataFrame to write. - partitioning_flavor (str, optional): The partitioning flavor to use. Defaults to "hive". - format (str, optional): The dataset format. Defaults to 'parquet'. - kwargs: Keyword arguments to be passed to `write_to_dataset()`. See - https://arrow.apache.org/docs/python/generated/pyarrow.dataset.write_dataset.html. - - Examples: - table = pa.Table.from_pandas(df_contact) - base_dir = "/home/viadot/contact" - partition_cols = ["updated_at_year", "updated_at_month", "updated_at_day"] - - df_to_dataset( - data=table, - base_dir=base_dir, - partitioning=partition_cols, - existing_data_behavior='overwrite_or_ignore', - max_rows_per_file=100_000 - ) - """ - table = pa.Table.from_pandas(df) - ds.write_dataset( - data=table, partitioning_flavor=partitioning_flavor, format=format, **kwargs - ) - - -@curry -def custom_mail_state_handler( - tracked_obj: Union["Flow", "Task"], - old_state: prefect.engine.state.State, - new_state: prefect.engine.state.State, - only_states: list = [Failed], - local_api_key: str = None, - credentials_secret: str = None, - vault_name: str = None, - from_email: str = None, - to_emails: str = None, -) -> prefect.engine.state.State: - """ - Custom state handler configured to work with sendgrid. - Works as a standalone state handler, or can be called from within a custom state handler. - Args: - tracked_obj (Task or Flow): Task or Flow object the handler is registered with. - old_state (State): previous state of tracked object. - new_state (State): new state of tracked object. - only_states ([State], optional): similar to `ignore_states`, but instead _only_ - notifies you if the Task / Flow is in a state from the provided list of `State` - classes. - local_api_key (str, optional): Api key from local config. - credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with API KEY. - vault_name (str, optional): Name of key vault. - from_email (str): Sender mailbox address. - to_emails (str): Receiver mailbox address. - Returns: State: the `new_state` object that was provided - - """ - - if credentials_secret is None: - try: - credentials_secret = PrefectSecret("mail_notifier_api_key").run() - except ValueError: - pass - - if credentials_secret is not None: - credentials_str = AzureKeyVaultSecret( - credentials_secret, vault_name=vault_name - ).run() - api_key = json.loads(credentials_str).get("API_KEY") - elif local_api_key is not None: - api_key = local_config.get(local_api_key).get("API_KEY") - else: - raise Exception("Please provide API KEY") - - curr_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - only_states = only_states or [] - if only_states and not any( - [isinstance(new_state, included) for included in only_states] - ): - return new_state - url = prefect.client.Client().get_cloud_url( - "flow-run", prefect.context["flow_run_id"], as_user=False - ) - message = Mail( - from_email=from_email, - to_emails=to_emails, - subject=f"The flow {tracked_obj.name} - Status {new_state}", - html_content=f"The flow {cast(str,tracked_obj.name)} FAILED at {curr_dt}. \ -

More details here: {url}

", - ) - try: - sendgrid = SendGridAPIClient(api_key) - sendgrid.send(message) - except Exception as e: - raise e - - return new_state - - -@task -def df_clean_column( - df: pd.DataFrame, columns_to_clean: List[str] = None -) -> pd.DataFrame: - """ - Function that removes special characters (such as escape symbols) - from a pandas DataFrame. - - Args: - df (pd.DataFrame): The DataFrame to clean. - columns_to_clean (List[str]): A list of columns to clean. Defaults is None. - - Returns: - pd.DataFrame: The cleaned DataFrame - """ - - df = df.copy() - - if columns_to_clean is None: - df.replace( - to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], - value=["", ""], - regex=True, - inplace=True, - ) - else: - for col in columns_to_clean: - df[col].replace( - to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], - value=["", ""], - regex=True, - inplace=True, - ) - return df diff --git a/src/viadot/utils.py b/src/viadot/utils.py index 5ef1323cf..957dc23fd 100644 --- a/src/viadot/utils.py +++ b/src/viadot/utils.py @@ -1,15 +1,18 @@ +"""Util functions.""" + +from collections.abc import Callable +import contextlib +from datetime import datetime, timezone import functools import logging import re import subprocess -from datetime import datetime, timezone -from typing import Any, Callable, Dict, List, Literal, Optional +from typing import TYPE_CHECKING, Any, Literal, Optional import pandas as pd import pyodbc +import pytest import requests - -# from prefect.utilities.graphql import EnumValue, with_args from requests.adapters import HTTPAdapter from requests.exceptions import ConnectionError, HTTPError, ReadTimeout, Timeout from requests.packages.urllib3.util.retry import Retry @@ -18,40 +21,43 @@ from viadot.exceptions import APIError, ValidationError from viadot.signals import SKIP -try: + +if TYPE_CHECKING: + from viadot.sources.base import Source + + +with contextlib.suppress(ImportError): import pyspark.sql.dataframe as spark -except ImportError: - pass def slugify(name: str) -> str: + """Slugify a string.""" return name.replace(" ", "_").lower() def handle_api_request( url: str, - auth: tuple = None, - params: Dict[str, Any] = None, - headers: Dict[str, Any] = None, + auth: tuple | None = None, + params: dict[str, Any] | None = None, + headers: dict[str, Any] | None = None, timeout: tuple = (3.05, 60 * 30), method: Literal["GET", "POST", "DELETE"] = "GET", - data: str = None, + data: str | None = None, ) -> requests.Response: - """ - Send an HTTP request to the specified URL using the provided parameters. + """Send an HTTP request to the specified URL using the provided parameters. Args: url (str): The URL to send the request to. - auth (tuple, optional): A tuple of (username, password) for basic authentication. - Defaults to None. + auth (tuple, optional): A tuple of (username, password) for basic + authentication. Defaults to None. params (Dict[str, Any], optional): A dictionary of query string parameters. Defaults to None. - headers (Dict[str, Any], optional): A dictionary of HTTP headers to include with the request. - Defaults to None. - timeout (tuple, optional): A tuple of (connect_timeout, read_timeout) in seconds. - Defaults to (3.05, 60 * 30). - method (Literal["GET", "POST", "DELETE"], optional): The HTTP method to use for the request. - Defaults to "GET". + headers (Dict[str, Any], optional): A dictionary of HTTP headers to include with + the request. Defaults to None. + timeout (tuple, optional): A tuple of (connect_timeout, read_timeout) in + seconds. Defaults to (3.05, 60 * 30). + method (Literal["GET", "POST", "DELETE"], optional): The HTTP method to use for + the request. Defaults to "GET". data (str, optional): The request body data as a string. Defaults to None. Returns: @@ -69,7 +75,7 @@ def handle_api_request( session.mount("http://", adapter) session.mount("https://", adapter) - response = session.request( + return session.request( method=method, url=url, auth=auth, @@ -79,10 +85,8 @@ def handle_api_request( data=data, ) - return response - -def handle_response( +def _handle_response( response: requests.Response, timeout: tuple = (3.05, 60 * 30) ) -> requests.Response: url = response.url @@ -94,43 +98,45 @@ def handle_response( msg += "while waiting for the server to return data." raise APIError(msg) from e except HTTPError as e: - raise APIError( - f"The API call to {url} failed. " - "Perhaps your account credentials need to be refreshed?", - ) from e + msg = f"The API call to {url} failed with status code {response.status_code}." + msg += "\nPerhaps your account credentials need to be refreshed?" + raise APIError(msg) from e except (ConnectionError, Timeout) as e: - raise APIError(f"The API call to {url} failed due to connection issues.") from e - except ProtocolError: - raise APIError(f"Did not receive any response for the API call to {url}.") + msg = f"The API call to {url} failed due to connection issues." + raise APIError(msg) from e + except ProtocolError as e: + msg = f"Did not receive any response for the API call to {url}." + raise APIError(msg) from e except Exception as e: - raise APIError("Unknown error.") from e + msg = "Unknown error." + raise APIError(msg) from e return response def handle_api_response( url: str, - auth: tuple = None, - params: Dict[str, Any] = None, - headers: Dict[str, Any] = None, + auth: tuple | None = None, + params: dict[str, Any] | None = None, + headers: dict[str, Any] | None = None, timeout: tuple = (3.05, 60 * 30), method: Literal["GET", "POST", "DELETE"] = "GET", - data: str = None, + data: str | None = None, ) -> requests.models.Response: - """ - Handle an HTTP response by applying retries and handling some common response - codes. + """Handle an HTTP response. + + Apply retries and handle some common response codes. Args: url (str): The URL which trying to connect. - auth (tuple, optional): A tuple of (username, password) for basic authentication. - Defaults to None. + auth (tuple, optional): A tuple of (username, password) for basic + authentication. Defaults to None. params (Dict[str, Any], optional): The request parameters. Defaults to None. headers (Dict[str, Any], optional): The request headers. Defaults to None. - method (Literal["GET", "POST", "DELETE"], optional): The HTTP method to use for the request. - Defaults to "GET". - timeout (tuple, optional): A tuple of (connect_timeout, read_timeout) in seconds. - Defaults to (3.05, 60 * 30). + method (Literal["GET", "POST", "DELETE"], optional): The HTTP method to use for + the request. Defaults to "GET". + timeout (tuple, optional): A tuple of (connect_timeout, read_timeout) in + seconds. Defaults to (3.05, 60 * 30). data (str, optional): The request body data as a string. Defaults to None. Raises: @@ -152,15 +158,13 @@ def handle_api_response( data=data, ) - response_handled = handle_response(response) - return response_handled + return _handle_response(response) def get_sql_server_table_dtypes( - table: str, con: pyodbc.Connection, schema: str = None + table: str, con: pyodbc.Connection, schema: str | None = None ) -> dict: - """ - Get column names and types from a SQL Server database table. + """Get column names and types from a SQL Server database table. Args: table (str): The table for which to fetch dtypes. @@ -172,7 +176,6 @@ def get_sql_server_table_dtypes( Returns: dict: A dictionary of the form {column_name: dtype, ...}. """ - query = f""" SELECT col.name, @@ -206,23 +209,21 @@ def get_sql_server_table_dtypes( def _cast_df_cols( df: pd.DataFrame, - types_to_convert: List[Literal["datetime", "bool", "int", "object"]] = [ - "datetime", - "bool", - "int", - ], + types_to_convert: list[Literal["datetime", "bool", "int", "object"]] | None = None, ) -> pd.DataFrame: - """ - Cast the data types of columns in a DataFrame. + """Cast the data types of columns in a DataFrame. Args: df (pd.DataFrame): The input DataFrame. - types_to_convert (Literal[datetime, bool, int, object], optional): List of types to be converted. - Defaults to ["datetime", "bool", "int"]. + types_to_convert (Literal[datetime, bool, int, object], optional): List of types + to be converted. Defaults to ["datetime", "bool", "int"]. Returns: pd.DataFrame: A DataFrame with modified data types. """ + if not types_to_convert: + types_to_convert = ["datetime", "bool", "int"] + df = df.replace({"False": False, "True": True}) datetime_cols = (col for col, dtype in df.dtypes.items() if dtype.kind == "M") @@ -249,14 +250,15 @@ def _cast_df_cols( def build_merge_query( table: str, primary_key: str, - source, - stg_schema: str = None, + source: "Source", + stg_schema: str | None = None, stg_table: str = "stg", - schema: str = None, + schema: str | None = None, df: Optional["spark.DataFrame"] = None, ) -> str: - """ - Build a merge query for the simplest possible upsert scenario: + """Build a merge query for the simplest possible upsert scenario. + + Used for: - updating and inserting all fields - merging on a single column, which has the same name in both tables @@ -283,13 +285,13 @@ def build_merge_query( # Get column names columns_query_result = _get_table_columns(schema=schema, table=table, source=source) - columns = [tup for tup in columns_query_result] + columns = list(columns_query_result) columns_stg_fqn = [f"{stg_table}.{col}" for col in columns] # Build merge query update_pairs = [f"existing.{col} = {stg_table}.{col}" for col in columns] - merge_query = f""" + return f""" MERGE INTO {fqn} existing USING {stg_fqn} {stg_table} ON {stg_table}.{primary_key} = existing.{primary_key} @@ -299,10 +301,9 @@ def build_merge_query( THEN INSERT({", ".join(columns)}) VALUES({", ".join(columns_stg_fqn)}); """ - return merge_query -def _get_table_columns(schema: str, table: str, source) -> str: +def _get_table_columns(schema: str, table: str, source: "Source") -> str: if source.__class__.__name__ == "Databricks": result = source.run(f"SHOW COLUMNS IN {schema}.{table}", "pandas") columns_query_result = result["col_name"].values @@ -325,10 +326,9 @@ def _get_table_columns(schema: str, table: str, source) -> str: def gen_bulk_insert_query_from_df( - df: pd.DataFrame, table_fqn: str, chunksize=1000, **kwargs + df: pd.DataFrame, table_fqn: str, chunksize: int = 1000, **kwargs ) -> str: - """ - Converts a DataFrame to a bulk INSERT query. + """Converts a DataFrame to a bulk INSERT query. Args: df (pd.DataFrame): The DataFrame which data should be put into the INSERT query. @@ -346,7 +346,9 @@ def gen_bulk_insert_query_from_df( 0 1 _suffixnan 1 NaN 1 2 Noneprefix 0 NaN 2 3 fooNULLbar 1 2.34 - >>> query = gen_bulk_insert_query_from_df(df, "users", status="APPROVED", address=None) + >>> query = gen_bulk_insert_query_from_df( + >>> df, table_fqn="users", status="APPROVED", address=None + >>> ) >>> print(query) INSERT INTO users (id, name, is_deleted, balance, status, address) VALUES (1, '_suffixnan', 1, NULL, 'APPROVED', NULL), @@ -354,11 +356,10 @@ def gen_bulk_insert_query_from_df( (3, 'fooNULLbar', 1, 2.34, 'APPROVED', NULL); """ if df.shape[1] == 1: - raise NotImplementedError( - "Currently, this function only handles DataFrames with at least two columns." - ) + msg = "Currently, this function only handles DataFrames with at least two columns." + raise NotImplementedError(msg) - def _gen_insert_query_from_records(records: List[tuple]) -> str: + def _gen_insert_query_from_records(records: list[tuple]) -> str: tuples = map(str, tuple(records)) # Change Nones to NULLs @@ -406,25 +407,27 @@ def _gen_insert_query_from_records(records: List[tuple]) -> str: chunk_insert_query = _gen_insert_query_from_records(chunk) insert_query += chunk_insert_query + ";\n\n" return insert_query - else: - return _gen_insert_query_from_records(tuples_escaped) + return _gen_insert_query_from_records(tuples_escaped) def handle_if_empty( if_empty: Literal["warn", "skip", "fail"] = "warn", - message: str = None, - logger: logging.Logger = logging.getLogger(__name__), -): - """ - Task for handling empty file. + message: str | None = None, + logger: logging.Logger | None = None, +) -> None: + """Task for handling empty file. + Args: if_empty (Literal, optional): What to do if file is empty. Defaults to "warn". message (str, optional): Massage to show in warning and error messages. Defaults to None. + Raises: ValueError: If `if_empty` is set to `fail`. SKIP: If `if_empty` is set to `skip`. """ + if not logger: + logger = logging.getLogger(__name__) if if_empty == "warn": logger.warning(message) elif if_empty == "skip": @@ -434,8 +437,7 @@ def handle_if_empty( def cleanup_df(df: pd.DataFrame) -> pd.DataFrame: - """ - Remove some common data corruption from a pandas DataFrame. + """Remove some common data corruption from a pandas DataFrame. Args: df (pd.DataFrame): The pandas DataFrame to be cleaned up. @@ -446,16 +448,27 @@ def cleanup_df(df: pd.DataFrame) -> pd.DataFrame: return df.replace(r"\n|\t", "", regex=True) -def call_shell(command): +def call_shell(command: str) -> str: + """Run a shell command and return the output.""" try: - result = subprocess.check_output(command, shell=True) + result = subprocess.check_output(command, shell=True) # noqa: S602 except subprocess.CalledProcessError as e: - # TODO: read the error message fro mstdout and pass here - raise ValueError("Generating the file failed.") from e - return result + # TODO: read the error message from stdout and pass here + msg = "Generating the file failed." + raise ValueError(msg) from e + else: + return result def df_snakecase_column_names(df: pd.DataFrame) -> pd.DataFrame: + """Snakecase the column names of a DataFrame. + + Args: + df (pd.DataFrame): The DataFrame to snakecase. + + Returns: + pd.DataFrame: The DataFrame with snakecased column names. + """ df.columns = ( df.columns.str.strip().str.replace(" ", "_").str.replace("-", "_").str.lower() ) @@ -463,7 +476,10 @@ def df_snakecase_column_names(df: pd.DataFrame) -> pd.DataFrame: def add_viadot_metadata_columns(func: Callable) -> Callable: - "Decorator that adds metadata columns to df in 'to_df' method" + """A decorator for the 'to_df()' method. + + Adds viadot metadata columns to the returned DataFrame. + """ @functools.wraps(func) def wrapper(*args, **kwargs) -> pd.DataFrame: @@ -481,13 +497,31 @@ def wrapper(*args, **kwargs) -> pd.DataFrame: return wrapper -def get_fqn(table_name: str, schema_name: str = None) -> str: +def get_fqn(table_name: str, schema_name: str | None = None) -> str: + """Get the fully qualified name of a table.""" return f"{schema_name}.{table_name}" if schema_name else table_name def validate_column_size( - df, tests, logger, stream_level, failed_tests, failed_tests_list -): + df: pd.DataFrame, + tests: dict[str, Any], + logger: logging.Logger, + stream_level: int, + failed_tests: int, + failed_tests_list: list, +) -> None: + """Validate the size of the columns in the DataFrame. + + Logic: TODO + + Args: + df (pd.DataFrame): The pandas DataFrame to validate. + tests (dict[str, Any]): _description_ + logger (logging.Logger): The logger to use. + stream_level (int): The logging level to use for logging. + failed_tests (int): _description_ + failed_tests_list (list): _description_ + """ try: for k, v in tests["column_size"].items(): column_max_length = ( @@ -513,8 +547,23 @@ def validate_column_size( def validate_column_unique_values( - df, tests, logger, stream_level, failed_tests, failed_tests_list -): + df: pd.DataFrame, + tests: dict[str, Any], + logger: logging.Logger, + stream_level: int, + failed_tests: int, + failed_tests_list: list, +) -> None: + """Validate whether a DataFrame column only contains unique values. + + Args: + df (pd.DataFrame): The pandas DataFrame to validate. + tests (dict[str, Any]): _description_ + logger (logging.Logger): The logger to use. + stream_level (int): The logging level to use for logging. + failed_tests (int): _description_ + failed_tests_list (list): _description_ + """ for column in tests["column_unique_values"]: df_size = df.shape[0] if df[column].nunique() == df_size: @@ -532,8 +581,23 @@ def validate_column_unique_values( def validate_column_list_to_match( - df, tests, logger, stream_level, failed_tests, failed_tests_list -): + df: pd.DataFrame, + tests: dict[str, Any], + logger: logging.Logger, + stream_level: int, + failed_tests: int, + failed_tests_list: list, +) -> None: + """Validate whether the columns of the DataFrame match the expected list. + + Args: + df (pd.DataFrame): The pandas DataFrame to validate. + tests (dict[str, Any]): _description_ + logger (logging.Logger): The logger to use. + stream_level (int): The logging level to use for logging. + failed_tests (int): _description_ + failed_tests_list (list): _description_ + """ if set(tests["column_list_to_match"]) == set(df.columns): logger.log(level=stream_level, msg=f"{tests['column_list_to_match']} passed.") else: @@ -546,8 +610,23 @@ def validate_column_list_to_match( def validate_dataset_row_count( - df, tests, logger, stream_level, failed_tests, failed_tests_list -): + df: pd.DataFrame, + tests: dict[str, Any], + logger: logging.Logger, + stream_level: int, + failed_tests: int, + failed_tests_list: list, +) -> None: + """Validate the DataFrame row count. + + Args: + df (pd.DataFrame): The pandas DataFrame to validate. + tests (dict[str, Any]): _description_ + logger (logging.Logger): The logger to use. + stream_level (int): The logging level to use for logging. + failed_tests (int): _description_ + failed_tests_list (list): _description_ + """ row_count = len(df.iloc[:, 0]) max_value = tests["dataset_row_count"]["max"] or 100_000_000 min_value = tests["dataset_row_count"]["min"] or 0 @@ -564,11 +643,28 @@ def validate_dataset_row_count( def validate_column_match_regex( - df, tests, logger, stream_level, failed_tests, failed_tests_list -): + df: pd.DataFrame, + tests: dict[str, Any], + logger: logging.Logger, + stream_level: int, + failed_tests: int, + failed_tests_list: list, +) -> None: + """Validate whether the values of a column match a regex pattern. + + Logic: TODO + + Args: + df (pd.DataFrame): The pandas DataFrame to validate. + tests (dict[str, Any]): _description_ + logger (logging.Logger): The logger to use. + stream_level (int): The logging level to use for logging. + failed_tests (int): _description_ + failed_tests_list (list): _description_ + """ for k, v in tests["column_match_regex"].items(): try: - matches = df[k].apply(lambda x: bool(re.match(v, str(x)))) + matches = df[k].apply(lambda x: bool(re.match(v, str(x)))) # noqa: B023 if all(matches): logger.log( level=stream_level, @@ -591,8 +687,23 @@ def validate_column_match_regex( def validate_column_sum( - df, tests, logger, stream_level, failed_tests, failed_tests_list -): + df: pd.DataFrame, + tests: dict[str, Any], + logger: logging.Logger, + stream_level: int, + failed_tests: int, + failed_tests_list: list, +) -> None: + """Validate the sum of a column in the DataFrame. + + Args: + df (pd.DataFrame): The pandas DataFrame to validate. + tests (dict[str, Any]): _description_ + logger (logging.Logger): The logger to use. + stream_level (int): The logging level to use for logging. + failed_tests (int): _description_ + failed_tests_list (list): _description_ + """ for column, bounds in tests["column_sum"].items(): col_sum = df[column].sum() min_bound = bounds["min"] @@ -611,30 +722,29 @@ def validate_column_sum( ) -##TO DO -# Create class DataFrameTests(BaseModel) def validate( df: pd.DataFrame, - tests: dict = None, + tests: dict | None = None, stream_level: int = logging.INFO, - logger: Optional[logging.Logger] = None, + logger: logging.Logger | None = None, ) -> None: - """ - Task to validate the data on DataFrame level. All numbers in the ranges are inclusive. - tests: + """Validate data. All numbers in the ranges are inclusive. + + Available tests: - `column_size`: dict{column: size} - `column_unique_values`: list[columns] - `column_list_to_match`: list[columns] - `dataset_row_count`: dict: {'min': number, 'max', number} - `column_match_regex`: dict: {column: 'regex'} - `column_sum`: dict: {column: {'min': number, 'max': number}} + Args: df (pd.DataFrame): The dataframe to validate. tests (dict, optional): Tests to apply on the data frame. Defaults to None. + Raises: ValidationError: If validation failed for at least one test. """ - if logger is None: logging.basicConfig(level=logging.INFO) logger = logging.getLogger("prefect_shell.utils") @@ -672,11 +782,55 @@ def validate( validate_column_sum( df, tests, logger, stream_level, failed_tests, failed_tests_list ) - else: - return "No dataframe tests to run." if failed_tests > 0: failed_tests_msg = ", ".join(failed_tests_list) - raise ValidationError( - f"Validation failed for {failed_tests} test(s): {failed_tests_msg}" - ) + msg = f"Validation failed for {failed_tests} test(s): {failed_tests_msg}" + raise ValidationError(msg) + + +def validate_and_reorder_dfs_columns( + dataframes_list: list[pd.DataFrame], +) -> list[pd.DataFrame]: + """Validate if dataframes from the list have the same column structure. + + Reorder columns to match the first DataFrame if necessary. + + Args: + dataframes_list (list[pd.DataFrame]): List containing DataFrames. + + Raises: + IndexError: If the list of DataFrames is empty. + ValueError: If DataFrames have different column structures. + """ + if not dataframes_list: + message = "The list of dataframes is empty." + raise IndexError(message) + + first_df_columns = dataframes_list[0].columns + + # Check that all DataFrames have the same columns + for i, df in enumerate(dataframes_list): + if set(df.columns) != set(first_df_columns): + message = f"""DataFrame at index {i} does not have the same structure as + the first DataFrame.""" + raise ValueError(message) + if not df.columns.equals(first_df_columns): + # Reorder columns for DataFrame at index 'i' to match the first DataFrame. + dataframes_list[i] = df.loc[:, first_df_columns] + + return dataframes_list + + +def skip_test_on_missing_extra(source_name: str, extra: str) -> None: + """Skip all tests in a file when a required extra is not installed. + + Args: + source_name (str): The name of the source for which dependencies are missing. + extra (str): The name of the extra that is missing. + """ + msg = f"Missing required extra '{extra}' for source '{source_name}'." + pytest.skip( + msg, + allow_module_level=True, + ) diff --git a/tests/.env.example b/tests/.env.example new file mode 100644 index 000000000..ec8179361 --- /dev/null +++ b/tests/.env.example @@ -0,0 +1,35 @@ +# File paths. +TEST_FILE_PATH= +TEST_ADLS_FILE_PATH_PARQUET= +TEST_ADLS_FILE_PATH_CSV= + +# Sensitive variables. +AZURE_ORG_NAME= +AZURE_PROJECT_NAME= +AZURE_REPO_NAME= +AZURE_REPO_URL= +DBT_REPO_URL= +C4C_API_URL= +VIADOT_SHAREPOINT_URL= +VIADOT_S3_BUCKET= +LUMA_URL=http://localhost:8000 + +# Credential secrets. +VIADOT_TEST_SHAREPOINT_CREDENTIALS_SECRET=sharepointsecret +VIADOT_TEST_ADLS_CREDENTIALS_SECRET=sp-adls-test +VIADOT_TEST_DATABRICKS_CREDENTIALS_SECRET=databricks-qa-elt +VIADOT_TEST_C4C_CREDENTIALS_SECRET=aia-c4c-qa + +# Config keys. +VIADOT_TEST_EXCHANGE_RATES_CONFIG_KEY=exchange_rates_dev +VIADOT_TEST_REDSHIFT_CONFIG_KEY=redshift_dev +VIADOT_TEST_S3_CONFIG_KEY=s3_dev +VIADOT_TEST_DATABRICKS_CONFIG_KEY=databricks_dev +VIADOT_TEST_SHAREPOINT_CONFIG_KEY=sharepoint_dev +VIADOT_TEST_TRINO_CONFIG_KEY=trino_dev +VIADOT_TEST_MINIO_CONFIG_KEY=minio_dev +VIADOT_TEST_C4C_CONFIG_KEY=c4c_dev +VIADOT_TEST_AWS_CONFIG_KEY=s3_dev +VIADOT_TEST_OUTLOOK_CONFIG_KEY=outlook_dev +VIADOT_TEST_MINDFUL_CONFIG_KEY=mindful_dev +VIADOT_TEST_HUBSPOT_CONFIG_KEY=hubspot_dev diff --git a/tests/.env.template b/tests/.env.template deleted file mode 100644 index 5b43cee57..000000000 --- a/tests/.env.template +++ /dev/null @@ -1,12 +0,0 @@ -C4C_API_URL= -VIADOT_SHAREPOINT_URL= -TEST_ADLS_FILE_PATH_PARQUET= -TEST_ADLS_FILE_PATH_CSV= -VIADOT_S3_BUCKET= -VIADOT_REDSHIFT_CONFIG_KEY= -VIADOT_S3_CONFIG_KEY= -VIADOT_DATABRICKS_CONFIG_KEY= -VIADOT_SHAREPOINT_CONFIG_KEY= -VIADOT_TRINO_CONFIG_KEY= -VIADOT_MINIO_CONFIG_KEY= -VIADOT_C4C_CONFIG_KEY= diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/conftest.py b/tests/conftest.py index c6c62bea9..c43ea92e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,10 @@ import os +from pathlib import Path +from dotenv import load_dotenv import pandas as pd import pytest -from dotenv import load_dotenv load_dotenv() @@ -35,31 +36,30 @@ def TEST_CSV_FILE_BLOB_PATH(): @pytest.fixture(scope="session", autouse=True) def DF(): - df = pd.DataFrame.from_dict( + return pd.DataFrame.from_dict( data={"country": ["italy", "germany", "spain"], "sales": [100, 50, 80]} ) - return df @pytest.fixture(scope="session", autouse=True) -def create_test_csv_file(DF, TEST_CSV_FILE_PATH): +def _create_test_csv_file(DF, TEST_CSV_FILE_PATH): DF.to_csv(TEST_CSV_FILE_PATH, index=False, sep="\t") yield - os.remove(TEST_CSV_FILE_PATH) + Path(TEST_CSV_FILE_PATH).unlink() @pytest.fixture(scope="session", autouse=True) -def create_test_parquet_file(DF, TEST_PARQUET_FILE_PATH): +def _create_test_parquet_file(DF, TEST_PARQUET_FILE_PATH): DF.to_parquet(TEST_PARQUET_FILE_PATH, index=False) yield - os.remove(TEST_PARQUET_FILE_PATH) + Path(TEST_PARQUET_FILE_PATH).unlink() @pytest.fixture(scope="session", autouse=True) -def create_test_parquet_file_2(DF, TEST_PARQUET_FILE_PATH_2): +def _create_test_parquet_file_2(DF, TEST_PARQUET_FILE_PATH_2): DF.to_parquet(TEST_PARQUET_FILE_PATH_2, index=False) yield - os.remove(TEST_PARQUET_FILE_PATH_2) + Path(TEST_PARQUET_FILE_PATH_2).unlink() @pytest.fixture(scope="session", autouse=True) @@ -79,34 +79,124 @@ def TEST_ADLS_FILE_PATH_CSV(): @pytest.fixture(scope="session", autouse=True) def redshift_config_key(): - return os.environ.get("VIADOT_REDSHIFT_CONFIG_KEY") + return os.environ.get("VIADOT_TEST_REDSHIFT_CONFIG_KEY") @pytest.fixture(scope="session", autouse=True) def s3_config_key(): - return os.environ.get("VIADOT_S3_CONFIG_KEY") + return os.environ.get("VIADOT_TEST_S3_CONFIG_KEY") @pytest.fixture(scope="session", autouse=True) def sharepoint_config_key(): - return os.environ.get("VIADOT_SHAREPOINT_CONFIG_KEY") + return os.environ.get("VIADOT_TEST_SHAREPOINT_CONFIG_KEY") @pytest.fixture(scope="session", autouse=True) def databricks_config_key(): - return os.environ.get("VIADOT_DATABRICKS_CONFIG_KEY") + return os.environ.get("VIADOT_TEST_DATABRICKS_CONFIG_KEY") @pytest.fixture(scope="session", autouse=True) def c4c_config_key(): - return os.environ.get("VIADOT_C4C_CONFIG_KEY") + return os.environ.get("VIADOT_TEST_C4C_CONFIG_KEY") @pytest.fixture(scope="session", autouse=True) def trino_config_key(): - return os.environ.get("VIADOT_TRINO_CONFIG_KEY") + return os.environ.get("VIADOT_TEST_TRINO_CONFIG_KEY") @pytest.fixture(scope="session", autouse=True) def minio_config_key(): - return os.environ.get("VIADOT_MINIO_CONFIG_KEY") + return os.environ.get("VIADOT_TEST_MINIO_CONFIG_KEY") + + +# Prefect + + +@pytest.fixture(scope="session", autouse=True) +def TEST_FILE_PATH(): + return os.environ.get("TEST_FILE_PATH") + + +@pytest.fixture(scope="session", autouse=True) +def TEST_DF(): + return pd.DataFrame.from_dict( + data={"country": ["italy", "germany", "spain"], "sales": [100, 50, 80]} + ) + + +@pytest.fixture(scope="session", autouse=True) +def AZURE_ORG_NAME(): + return os.environ.get("AZURE_ORG_NAME") + + +@pytest.fixture(scope="session", autouse=True) +def AZURE_PROJECT_NAME(): + return os.environ.get("AZURE_PROJECT_NAME") + + +@pytest.fixture(scope="session", autouse=True) +def AZURE_REPO_NAME(): + return os.environ.get("AZURE_REPO_NAME") + + +@pytest.fixture(scope="session", autouse=True) +def AZURE_REPO_URL(): + return os.environ.get("AZURE_REPO_URL") + + +@pytest.fixture(scope="session", autouse=True) +def sharepoint_credentials_secret(): + return os.environ.get("VIADOT_TEST_SHAREPOINT_CREDENTIALS_SECRET") + + +@pytest.fixture(scope="session", autouse=True) +def aws_config_key(): + return os.environ.get("VIADOT_TEST_AWS_CONFIG_KEY") + + +@pytest.fixture(scope="session", autouse=True) +def outlook_config_key(): + return os.environ.get("VIADOT_TEST_OUTLOOK_CONFIG_KEY") + + +@pytest.fixture(scope="session", autouse=True) +def mindful_config_key(): + return os.environ.get("VIADOT_TEST_MINDFUL_CONFIG_KEY") + + +@pytest.fixture(scope="session", autouse=True) +def hubspot_config_key(): + return os.environ.get("VIADOT_TEST_HUBSPOT_CONFIG_KEY") + + +@pytest.fixture(scope="session", autouse=True) +def LUMA_URL(): + return os.environ.get("LUMA_URL") + + +@pytest.fixture(scope="session", autouse=True) +def dbt_repo_url(): + return os.environ.get("DBT_REPO_URL") + + +@pytest.fixture(scope="session", autouse=True) +def exchange_rates_config_key(): + return os.environ.get("VIADOT_TEST_EXCHANGE_RATES_CONFIG_KEY") + + +@pytest.fixture(scope="session", autouse=True) +def adls_credentials_secret(): + return os.environ.get("VIADOT_TEST_ADLS_CREDENTIALS_SECRET") + + +@pytest.fixture(scope="session", autouse=True) +def databricks_credentials_secret(): + return os.environ.get("VIADOT_TEST_DATABRICKS_CREDENTIALS_SECRET") + + +@pytest.fixture(scope="session", autouse=True) +def c4c_credentials_secret(): + return os.environ.get("VIADOT_TEST_C4C_CREDENTIALS_SECRET") diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index e69de29bb..c210facce 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -0,0 +1 @@ +"""Integration tests.""" diff --git a/tests/integration/orchestration/prefect/flows/test_cloud_for_customers.py b/tests/integration/orchestration/prefect/flows/test_cloud_for_customers.py new file mode 100644 index 000000000..f5ed9a7ab --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_cloud_for_customers.py @@ -0,0 +1,53 @@ +from viadot.orchestration.prefect.flows import ( + cloud_for_customers_to_adls, + cloud_for_customers_to_databricks, +) +from viadot.sources import AzureDataLake, Databricks + + +TEST_SCHEMA = "test_viadot_schema" +TEST_TABLE = "test" + + +def test_cloud_for_customers_to_adls( + cloud_for_customers_url, + TEST_FILE_PATH, + c4c_credentials_secret, + adls_credentials_secret, +): + lake = AzureDataLake(config_key="adls_test") + + assert not lake.exists(TEST_FILE_PATH) + + cloud_for_customers_to_adls( + cloud_for_customers_url=cloud_for_customers_url, + adls_path=TEST_FILE_PATH, + cloud_for_customers_credentials_secret=c4c_credentials_secret, + adls_credentials_secret=adls_credentials_secret, + ) + + assert lake.exists(TEST_FILE_PATH) + + lake.rm(TEST_FILE_PATH) + + +def test_cloud_for_customers_to_databricks( + cloud_for_customers_url, c4c_credentials_secret, databricks_credentials_secret +): + databricks = Databricks(config_key="databricks-qa-elt") + + assert not databricks._check_if_table_exists(schema=TEST_SCHEMA, table=TEST_TABLE) + + cloud_for_customers_to_databricks( + cloud_for_customers_url=cloud_for_customers_url, + databricks_table=TEST_TABLE, + databricks_schema=TEST_SCHEMA, + cloud_for_customers_credentials_secret=c4c_credentials_secret, + databricks_credentials_secret=databricks_credentials_secret, + ) + + assert databricks._check_if_table_exists(schema=TEST_SCHEMA, table=TEST_TABLE) + + databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) + + databricks.session.stop() diff --git a/tests/integration/orchestration/prefect/flows/test_duckdb_to_parquet.py b/tests/integration/orchestration/prefect/flows/test_duckdb_to_parquet.py new file mode 100644 index 000000000..ecbd36fbf --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_duckdb_to_parquet.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from viadot.orchestration.prefect.flows import duckdb_to_parquet +from viadot.sources import DuckDB + + +DUCKDB_CREDS = {"database": "test.duckdb", "read_only": False} +PATH = "test_parquet.parquet" + + +def test_duckdb_to_parquet(): + assert not Path(PATH).exists() + + duckdb = DuckDB(credentials=DUCKDB_CREDS) + duckdb.run_query( + query=""" +--CREATE SCHEMA sandbox; +CREATE or replace TABLE sandbox.numbers AS +SELECT 42 AS i, 84 AS j; +""" + ) + duckdb_to_parquet( + query="""SELECT * FROM sandbox.numbers""", + path=PATH, + duckdb_credentials=DUCKDB_CREDS, + if_exists="replace", + ) + + assert Path(PATH).exists() + Path(PATH).unlink() diff --git a/tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py b/tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py new file mode 100644 index 000000000..7eee9f786 --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py @@ -0,0 +1,50 @@ +from pathlib import Path + +import pytest +from viadot.orchestration.prefect.flows import duckdb_to_sql_server +from viadot.orchestration.prefect.tasks import sql_server_query +from viadot.sources import DuckDB, SQLServer + + +TABLE = "test_table" +SCHEMA = "sandbox" +DATABASE_PATH = "test_db_123.duckdb" +DUCKDB_CREDS = {"database": DATABASE_PATH, "read_only": False} + + +@pytest.fixture() +def sql_server(): + # Initialize the SQLServer instance with the test credentials. + return SQLServer(config_key="sql_server") + + +@pytest.fixture() +def duckdb(): + # Initialize the SQLServer instance with the test credentials. + duckdb = DuckDB(credentials=DUCKDB_CREDS) + duckdb.run_query( + query=""" +CREATE SCHEMA sandbox; +CREATE or replace TABLE sandbox.numbers AS +SELECT 42 AS i, 84 AS j; +""" + ) + yield duckdb + Path(DATABASE_PATH).unlink() + + +def test_duckdb_to_sql_server(duckdb, sql_server): # noqa: ARG001 + duckdb_to_sql_server( + query="select * from sandbox.numbers", + local_path="testing.csv", + db_table=TABLE, + db_schema=SCHEMA, + duckdb_credentials=DUCKDB_CREDS, + sql_server_credentials_secret="sql-server", # noqa: S106 + ) + assert sql_server.exists(table=TABLE, schema=SCHEMA) + + sql_server_query( + query=f"""DROP TABLE {SCHEMA}.{TABLE}""", + credentials_secret="sql-server", # noqa: S106 + ) diff --git a/tests/integration/orchestration/prefect/flows/test_duckdb_transform.py b/tests/integration/orchestration/prefect/flows/test_duckdb_transform.py new file mode 100644 index 000000000..e7441e505 --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_duckdb_transform.py @@ -0,0 +1,36 @@ +from pathlib import Path + +import pytest +from viadot.orchestration.prefect.flows import duckdb_transform +from viadot.sources import DuckDB + + +TABLE = "test_table" +SCHEMA = "test_schema" +DATABASE_PATH = "test_db_123.duckdb" +DUCKDB_CREDS = {"database": DATABASE_PATH, "read_only": False} + + +@pytest.fixture(scope="module") +def duckdb(): + duckdb = DuckDB(credentials=DUCKDB_CREDS) + yield duckdb + Path(DATABASE_PATH).unlink() + + +def test_duckdb_transform(duckdb): + duckdb_transform(f"DROP SCHEMA IF EXISTS {SCHEMA}", duckdb_credentials=DUCKDB_CREDS) + duckdb_transform(f"CREATE SCHEMA {SCHEMA}", duckdb_credentials=DUCKDB_CREDS) + duckdb_transform( + f"""CREATE TABLE {SCHEMA}.{TABLE} ( + i INTEGER NOT NULL, + decimalnr DOUBLE CHECK (decimalnr < 10), + date DATE UNIQUE, + time TIMESTAMP);""", + duckdb_credentials=DUCKDB_CREDS, + ) + assert SCHEMA in duckdb.schemas + duckdb_transform( + f"DROP SCHEMA IF EXISTS {SCHEMA} CASCADE", duckdb_credentials=DUCKDB_CREDS + ) + assert SCHEMA not in duckdb.schemas diff --git a/tests/integration/orchestration/prefect/flows/test_exchange_rates.py b/tests/integration/orchestration/prefect/flows/test_exchange_rates.py new file mode 100644 index 000000000..633a38734 --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_exchange_rates.py @@ -0,0 +1,47 @@ +from viadot.orchestration.prefect.flows import ( + exchange_rates_to_adls, + exchange_rates_to_databricks, +) +from viadot.sources import AzureDataLake, Databricks + + +TEST_SCHEMA = "test_viadot_schema" +TEST_TABLE = "test" + + +def test_exchange_rates_to_adls( + TEST_FILE_PATH, exchange_rates_config_key, adls_credentials_secret +): + lake = AzureDataLake(config_key="adls_test") + + assert not lake.exists(TEST_FILE_PATH) + + exchange_rates_to_adls( + adls_path=TEST_FILE_PATH, + exchange_rates_config_key=exchange_rates_config_key, + adls_credentials_secret=adls_credentials_secret, + ) + + assert lake.exists(TEST_FILE_PATH) + + lake.rm(TEST_FILE_PATH) + + +def test_exchange_rates_to_databricks( + exchange_rates_config_key, databricks_credentials_secret +): + databricks = Databricks(config_key="databricks-qa-elt") + assert not databricks._check_if_table_exists(schema=TEST_SCHEMA, table=TEST_TABLE) + + exchange_rates_to_databricks( + databricks_schema=TEST_SCHEMA, + databricks_table=TEST_TABLE, + exchange_rates_config_key=exchange_rates_config_key, + databricks_credentials_secret=databricks_credentials_secret, + ) + + assert databricks._check_if_table_exists(schema=TEST_SCHEMA, table=TEST_TABLE) + + databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) + + databricks.session.stop() diff --git a/tests/integration/orchestration/prefect/flows/test_genesys_to_adls.py b/tests/integration/orchestration/prefect/flows/test_genesys_to_adls.py new file mode 100644 index 000000000..a0f8c1a51 --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_genesys_to_adls.py @@ -0,0 +1,19 @@ +from viadot.orchestration.prefect.flows import genesys_to_adls + + +def test_genesys_to_adls(): + state = genesys_to_adls( + azure_key_vault_secret="genesys-access-1", # noqa: S106 + verbose=True, + endpoint="routing_queues_members", + post_data_list=[""], + queues_ids=[ + "25e29c3e-ba33-4556-a78b-2abc40ec9699", + "f4ef329a-d903-41f4-ad4d-876a753adf3c", + ], + drop_duplicates=True, + adls_azure_key_vault_secret="app-azure-cr-datalakegen2", # noqa: S106 + adls_path="raw/dyvenia_sandbox/genesys/genesys_agents.csv", + adls_path_overwrite=True, + ) + assert state.is_successful() diff --git a/tests/integration/orchestration/prefect/flows/test_sap_to_parquet.py b/tests/integration/orchestration/prefect/flows/test_sap_to_parquet.py new file mode 100644 index 000000000..0d8c42a3c --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_sap_to_parquet.py @@ -0,0 +1,29 @@ +from pathlib import Path + +import pandas as pd +from viadot.orchestration.prefect.flows import sap_to_parquet + + +PATH = "test_path.parquet" +SAP_CREDS = "sap-dev" + + +def test_sap_to_parquet(): + assert not Path(PATH).exists() + + sap_to_parquet( + path=PATH, + query="""SELECT MATKL, MTART, ERSDA FROM MARA LIMIT 100""", + func="RFC_READ_TABLE", + rfc_total_col_width_character_limit=400, + sap_credentials_secret=SAP_CREDS, + ) + + assert Path(PATH).exists() + + df = pd.read_parquet(PATH) + n_row = 100 + n_col = 5 + + assert (n_row, n_col) == df.shape + Path(PATH).unlink() diff --git a/tests/integration/orchestration/prefect/flows/test_sap_to_redshift_spectrum.py b/tests/integration/orchestration/prefect/flows/test_sap_to_redshift_spectrum.py new file mode 100644 index 000000000..c83bf34f2 --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_sap_to_redshift_spectrum.py @@ -0,0 +1,30 @@ +import os + +from viadot.orchestration.prefect.flows import sap_to_redshift_spectrum +from viadot.sources import RedshiftSpectrum + + +S3_BUCKET = os.environ.get("S3_BUCKET") +TEST_SCHEMA = "raw_test" +TEST_TABLE = "test_sap_to_redshift_spectrum" + + +def test_sap_to_redshift_spectrum(): + sap_to_redshift_spectrum( + to_path=f"s3://{S3_BUCKET}/nesso/{TEST_SCHEMA}/{TEST_TABLE}", + schema_name=TEST_SCHEMA, + table=TEST_TABLE, + query="SELECT MATKL, MTART, ERSDA FROM MARA WHERE ERSDA = '20221230'", + func="BBP_RFC_READ_TABLE", + sap_config_key="sap_test", + ) + + rs = RedshiftSpectrum() + + received_df = rs.to_df(database=TEST_SCHEMA, table=TEST_TABLE) + + n_cols = 3 + assert not received_df.empty + assert len(received_df.columns) == n_cols + + rs.rm(database=TEST_SCHEMA, table=TEST_TABLE) diff --git a/tests/integration/orchestration/prefect/flows/test_sharepoint_to_adls.py b/tests/integration/orchestration/prefect/flows/test_sharepoint_to_adls.py new file mode 100644 index 000000000..18e88bfd4 --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_sharepoint_to_adls.py @@ -0,0 +1,25 @@ +from viadot.orchestration.prefect.flows import sharepoint_to_adls +from viadot.sources import AzureDataLake + + +def test_sharepoint_to_adls( + sharepoint_url, + TEST_FILE_PATH, + adls_credentials_secret, + sharepoint_credentials_secret, +): + lake = AzureDataLake(config_key="adls_test") + + assert not lake.exists(TEST_FILE_PATH) + + sharepoint_to_adls( + sharepoint_url=sharepoint_url, + adls_path=TEST_FILE_PATH, + columns="A:B", + sharepoint_credentials_secret=sharepoint_credentials_secret, + adls_credentials_secret=adls_credentials_secret, + ) + + assert lake.exists(TEST_FILE_PATH) + + lake.rm(TEST_FILE_PATH) diff --git a/tests/integration/orchestration/prefect/flows/test_sharepoint_to_redshift_spectrum.py b/tests/integration/orchestration/prefect/flows/test_sharepoint_to_redshift_spectrum.py new file mode 100644 index 000000000..ffad95f4b --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_sharepoint_to_redshift_spectrum.py @@ -0,0 +1,29 @@ +import os + +from viadot.orchestration.prefect.flows import sharepoint_to_redshift_spectrum +from viadot.sources import RedshiftSpectrum + + +def test_sharepoint_to_redshift_spectrum(sharepoint_url, sharepoint_credentials_secret): + s3_bucket = os.environ.get("S3_BUCKET") + test_schema = "raw_test" + test_table = "test_sharepoint_to_redshift_spectrum" + + sharepoint_to_redshift_spectrum( + sharepoint_url=sharepoint_url, + to_path=f"s3://{s3_bucket}/nesso/{test_schema}/{test_table}", + schema_name=test_schema, + table=test_schema, + sharepoint_credentials_secret=sharepoint_credentials_secret, + download_all_files=False, + return_as_one_table=False, + file_sheet_mapping={}, + ) + + rs = RedshiftSpectrum() + + received_df = rs.to_df(database=test_schema, table=test_table) + + assert not received_df.empty + + rs.rm(database=test_schema, table=test_table) diff --git a/tests/integration/orchestration/prefect/flows/test_sharepoint_to_s3.py b/tests/integration/orchestration/prefect/flows/test_sharepoint_to_s3.py new file mode 100644 index 000000000..6f228867b --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_sharepoint_to_s3.py @@ -0,0 +1,32 @@ +import os +from pathlib import Path + +from viadot.orchestration.prefect.flows import sharepoint_to_s3 +from viadot.sources import S3 + + +S3_BUCKET = os.environ.get("S3_BUCKET") +TEST_SCHEMA = "raw_test" +TEST_TABLE = "test_sharepoint_to_s3" + + +def test_sharepoint_to_s3(sharepoint_url, sharepoint_config_key): + file_extension = sharepoint_url.split(".")[-1] + local_path = "sharepoint_test" + file_extension + s3_path = f"s3://{S3_BUCKET}/nesso/{TEST_SCHEMA}/{TEST_TABLE}." + file_extension + + sharepoint_to_s3( + url=sharepoint_url, + local_path=local_path, + to_path=s3_path, + sharepoint_config_key=sharepoint_config_key, + ) + + Path(local_path).unlink() + + s3 = S3() + file_exists = s3.exists(path=s3_path) + + assert file_exists is True + + s3.rm(paths=[s3_path]) diff --git a/tests/integration/orchestration/prefect/flows/test_sql_server_to_minio.py b/tests/integration/orchestration/prefect/flows/test_sql_server_to_minio.py new file mode 100644 index 000000000..e66a222b0 --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_sql_server_to_minio.py @@ -0,0 +1,29 @@ +from viadot.orchestration.prefect.flows import sql_server_to_minio +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import MinIO + + +PATH = "data/tables_schemas.parquet" + + +def test_sql_server_to_minio(): + credentials = get_credentials("minio-dev") + sql_server_to_minio( + query=""" SELECT t.name as table_name + ,s.name as schema_name + FROM sys.tables t + JOIN sys.schemas s + ON t.schema_id = s.schema_id""", + path="s3://datalake-dev/data/tables_schemas.parquet", + sql_server_credentials_secret="sql-server", # noqa: S106 + minio_credentials=credentials, + basename_template="test-{i}", + if_exists="overwrite_or_ignore", + ) + + minio = MinIO(credentials=credentials) + file_exists = minio._check_if_file_exists(PATH) + + assert file_exists is True + + minio.rm(path=PATH) diff --git a/tests/integration/orchestration/prefect/flows/test_transform_and_catalog.py b/tests/integration/orchestration/prefect/flows/test_transform_and_catalog.py new file mode 100644 index 000000000..6e0008a84 --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_transform_and_catalog.py @@ -0,0 +1,31 @@ +from viadot.orchestration.prefect.flows import transform_and_catalog + + +def test_transform_and_catalog_model(dbt_repo_url, LUMA_URL): + logs = transform_and_catalog( + dbt_repo_url=dbt_repo_url, + dbt_repo_branch="luma", + dbt_project_path="dbt_luma", + dbt_target="dev", + luma_url=LUMA_URL, + metadata_kind="model", + ) + log = "\n".join(logs) + success_message = "The request was successful!" + + assert success_message in log + + +def test_transform_and_catalog_model_run(dbt_repo_url, LUMA_URL): + logs = transform_and_catalog( + dbt_repo_url=dbt_repo_url, + dbt_repo_branch="luma", + dbt_project_path="dbt_luma", + dbt_target="dev", + luma_url=LUMA_URL, + metadata_kind="model_run", + ) + log = "\n".join(logs) + success_message = "The request was successful!" + + assert success_message in log diff --git a/tests/integration/orchestration/prefect/tasks/test_adls.py b/tests/integration/orchestration/prefect/tasks/test_adls.py new file mode 100644 index 000000000..6c8f17773 --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_adls.py @@ -0,0 +1,39 @@ +import pandas as pd +from prefect import flow, task +from viadot.orchestration.prefect.tasks import df_to_adls +from viadot.utils import skip_test_on_missing_extra + + +try: + from viadot.sources import AzureDataLake +except ImportError: + skip_test_on_missing_extra(source_name="AzureDataLake", extra="azure") + + +def test_df_to_adls(TEST_FILE_PATH): + lake = AzureDataLake(config_key="adls_test") + + @task + def create_df(): + return pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + + @task + def check_file_exists(path): + return lake.exists(path) + + @flow + def test_flow(): + df = create_df() + to_adls = df_to_adls( + df=df, + path=TEST_FILE_PATH, + config_key="adls_test", + overwrite=True, + ) + return check_file_exists(TEST_FILE_PATH, wait_for=to_adls) + + result = test_flow() + assert result is True + + # Cleanup. + lake.rm(TEST_FILE_PATH) diff --git a/tests/integration/orchestration/prefect/tasks/test_bcp.py b/tests/integration/orchestration/prefect/tasks/test_bcp.py new file mode 100644 index 000000000..efd24017b --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_bcp.py @@ -0,0 +1,22 @@ +from viadot.orchestration.prefect.tasks import bcp + + +SCHEMA = "sandbox" +TABLE = "test_bcp" +ERROR_TABLE = "test_bcp_error" +ERROR_LOG_FILE = "log_file.log" +TEST_CSV_FILE_PATH = "test_bcp.csv" + + +def test_bcp(): + try: + result = bcp( + credentials_secret="sql-server", # noqa: S106 + path=TEST_CSV_FILE_PATH, + schema=SCHEMA, + table=TABLE, + error_log_file_path=ERROR_LOG_FILE, + ) + except Exception: + result = False + assert result is not False diff --git a/tests/integration/orchestration/prefect/tasks/test_cloud_for_customer.py b/tests/integration/orchestration/prefect/tasks/test_cloud_for_customer.py new file mode 100644 index 000000000..4686d3d43 --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_cloud_for_customer.py @@ -0,0 +1,15 @@ +"""Test task for pulling data from CloudForCustomers and loading into ADLS.""" + +from viadot.orchestration.prefect.flows import cloud_for_customers_to_adls + + +def test_cloud_for_customers_to_adls(): + state = cloud_for_customers_to_adls( + report_url="https://my341115.crm.ondemand.com/sap/c4c/odata/ana_businessanalytics_analytics.svc/RPZ36A87743F65355C0B904A5QueryResults?$select=TDOC_PRIORITY", + filter_params={"CBTD_REF_TYPE_CODE": "(%20eq%20%27118%27)"}, + adls_path="raw/c4c/ticket/leads_link/c4c_tickets_leads_link.parquet", + overwrite=True, + cloud_for_customers_credentials_secret="aia-c4c-prod", # noqa: S106 + adls_credentials_secret="app-azure-cr-datalakegen2", # noqa: S106 + ) + assert state.is_successful() diff --git a/tests/integration/orchestration/prefect/tasks/test_databricks.py b/tests/integration/orchestration/prefect/tasks/test_databricks.py new file mode 100644 index 000000000..9215d4d9f --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_databricks.py @@ -0,0 +1,54 @@ +import contextlib + +from prefect import flow +import pytest +from viadot.exceptions import TableDoesNotExistError +from viadot.utils import skip_test_on_missing_extra + + +try: + from viadot.sources import Databricks +except ImportError: + skip_test_on_missing_extra(source_name="Databricks", extra="databricks") + +from viadot.orchestration.prefect.tasks import df_to_databricks + + +TEST_SCHEMA = "test_viadot_schema" +TEST_TABLE = "test" + + +@pytest.fixture(scope="session", autouse=True) +def databricks() -> Databricks: + databricks = Databricks(config_key="databricks-qa-elt") + databricks.create_schema(TEST_SCHEMA) + + yield databricks + + with contextlib.suppress(TableDoesNotExistError): + databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) + with contextlib.suppress(Exception): + databricks.drop_schema(TEST_SCHEMA) + + databricks.session.stop() + + +def test_df_to_databricks( + TEST_DF, databricks: Databricks, databricks_credentials_secret +): + @flow + def test_flow(): + return df_to_databricks( + df=TEST_DF, + schema=TEST_SCHEMA, + table=TEST_TABLE, + credentials_secret=databricks_credentials_secret, + ) + + assert not databricks._check_if_table_exists(schema=TEST_SCHEMA, table=TEST_TABLE) + + test_flow() + + assert databricks._check_if_table_exists(schema=TEST_SCHEMA, table=TEST_TABLE) + + databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) diff --git a/tests/integration/orchestration/prefect/tasks/test_dbt.py b/tests/integration/orchestration/prefect/tasks/test_dbt.py new file mode 100644 index 000000000..4175027b1 --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_dbt.py @@ -0,0 +1,11 @@ +from prefect import flow +from viadot.orchestration.prefect.tasks import dbt_task + + +def test_dbt_task(): + @flow + def test_flow(): + return dbt_task() + + result = test_flow() + assert result == "Hello, prefect-viadot!" diff --git a/tests/integration/orchestration/prefect/tasks/test_duckdb.py b/tests/integration/orchestration/prefect/tasks/test_duckdb.py new file mode 100644 index 000000000..b9cf469fa --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_duckdb.py @@ -0,0 +1,35 @@ +from pathlib import Path + +import pytest +from viadot.orchestration.prefect.tasks import duckdb_query +from viadot.sources import DuckDB + + +TABLE = "test_table" +SCHEMA = "test_schema" +DATABASE_PATH = "test_db_123.duckdb" +DUCKDB_CREDS = {"database": DATABASE_PATH, "read_only": False} + + +@pytest.fixture(scope="module") +def duckdb(): + duckdb = DuckDB(credentials=DUCKDB_CREDS) + yield duckdb + Path(DATABASE_PATH).unlink() + + +def test_duckdb_query(duckdb): + duckdb_query(f"DROP SCHEMA IF EXISTS {SCHEMA}", credentials=DUCKDB_CREDS) + duckdb_query( + f"""CREATE TABLE {SCHEMA}.{TABLE} ( + i INTEGER NOT NULL, + decimalnr DOUBLE CHECK (decimalnr < 10), + date DATE UNIQUE, + time TIMESTAMP);""", + duckdb_credentials=DUCKDB_CREDS, + ) + assert SCHEMA in duckdb.schemas + duckdb_query( + f"DROP SCHEMA IF EXISTS {SCHEMA} CASCADE", duckdb_credentials=DUCKDB_CREDS + ) + assert SCHEMA not in duckdb.schemas diff --git a/tests/integration/orchestration/prefect/tasks/test_exchange_rates_tasks.py b/tests/integration/orchestration/prefect/tasks/test_exchange_rates_tasks.py new file mode 100644 index 000000000..88883e305 --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_exchange_rates_tasks.py @@ -0,0 +1,61 @@ +from viadot.orchestration.prefect.tasks import exchange_rates_to_df + + +expected_df = [ + [ + "2022-10-07", + "PLN", + 0.200449, + 0.205797, + 0.180677, + 0.199348, + 1, + 1.530772, + 926.075683, + 5.038713, + 2.247641, + 2.147416, + 28.902797, + ], + [ + "2022-10-08", + "PLN", + 0.200369, + 0.205714, + 0.180732, + 0.199321, + 1, + 1.530195, + 929.700228, + 5.036524, + 2.249218, + 2.14749, + 28.891214, + ], + [ + "2022-10-09", + "PLN", + 0.200306, + 0.205766, + 0.180915, + 0.199232, + 1, + 1.530656, + 925.417886, + 5.03927, + 2.249399, + 2.146299, + 28.910243, + ], +] + + +def test_exchange_rates_to_df_task(): + df = exchange_rates_to_df.fn( + currency="PLN", + config_key="exchange_rates_dev", + start_date="2022-10-07", + end_date="2022-10-09", + ) + df.drop(["_viadot_downloaded_at_utc"], axis=1, inplace=True) + assert df == expected_df diff --git a/tests/integration/orchestration/prefect/tasks/test_git.py b/tests/integration/orchestration/prefect/tasks/test_git.py new file mode 100644 index 000000000..02c90d308 --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_git.py @@ -0,0 +1,17 @@ +from pathlib import Path +import shutil + +from loguru import logger +from viadot.orchestration.prefect.tasks import clone_repo + + +def test_clone_repo_private(AZURE_REPO_URL): + test_repo_dir = "test_repo_dir" + + assert not Path(test_repo_dir).exists() + + clone_repo.fn(url=AZURE_REPO_URL, path=test_repo_dir, logger=logger) + + assert Path(test_repo_dir).exists() + + shutil.rmtree(test_repo_dir) diff --git a/tests/integration/orchestration/prefect/tasks/test_luma.py b/tests/integration/orchestration/prefect/tasks/test_luma.py new file mode 100644 index 000000000..ebf61a755 --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_luma.py @@ -0,0 +1,49 @@ +from loguru import logger +import pytest +from viadot.orchestration.prefect.tasks import luma_ingest_task + + +@pytest.mark.asyncio() +async def test_luma_ingest_task_model_metadata(LUMA_URL): + logs = await luma_ingest_task.fn( + metadata_kind="model", + metadata_dir_path="tests/resources/metadata/model", + luma_url=LUMA_URL, + logger=logger, + raise_on_failure=False, + ) + log = "\n".join(logs) + success_message = "The request was successful!" + + assert success_message in log + + +@pytest.mark.asyncio() +async def test_luma_ingest_task_model_run_metadata(LUMA_URL): + logs = await luma_ingest_task.fn( + metadata_kind="model_run", + metadata_dir_path="tests/resources/metadata/model_run", + luma_url=LUMA_URL, + logger=logger, + raise_on_failure=False, + ) + log = "\n".join(logs) + success_message = "The request was successful!" + + assert success_message in log + + +@pytest.mark.asyncio() +async def test_luma_ingest_task_model_run_metadata_follow(LUMA_URL): + logs = await luma_ingest_task.fn( + metadata_kind="model_run", + metadata_dir_path="tests/resources/metadata/model_run", + luma_url=LUMA_URL, + follow=True, + logger=logger, + raise_on_failure=False, + ) + log = "\n".join(logs) + success_message = "The request was successful!" + + assert success_message in log diff --git a/tests/integration/orchestration/prefect/tasks/test_minio.py b/tests/integration/orchestration/prefect/tasks/test_minio.py new file mode 100644 index 000000000..1d48e1864 --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_minio.py @@ -0,0 +1,20 @@ +import pandas as pd +from viadot.orchestration.prefect.tasks import df_to_minio +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import MinIO + + +PATH = "data/duckdb_test.parquet" + + +def test_df_to_minio(): + d = {"col1": [1, 2], "col2": [3, 4]} + df = pd.DataFrame(data=d) + credentials = get_credentials("minio-dev") + df_to_minio(df=df, path=PATH, credentials=credentials) + minio = MinIO(credentials=credentials) + file_exists = minio._check_if_file_exists(PATH) + + assert file_exists + + minio.rm(path=PATH) diff --git a/tests/integration/orchestration/prefect/tasks/test_redshift_spectrum.py b/tests/integration/orchestration/prefect/tasks/test_redshift_spectrum.py new file mode 100644 index 000000000..12c2d2a0b --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_redshift_spectrum.py @@ -0,0 +1,54 @@ +import os + +import pandas as pd +from prefect import flow +import pytest +from viadot.orchestration.prefect.tasks import df_to_redshift_spectrum +from viadot.utils import skip_test_on_missing_extra + + +try: + from viadot.sources import RedshiftSpectrum +except ImportError: + skip_test_on_missing_extra(source_name="RedshiftSpectrum", extra="aws") + + +S3_BUCKET = os.environ.get("S3_BUCKET") +TEST_SCHEMA = "raw_test" +TEST_TABLE = "test_sap_to_redshift_spectrum" + + +@pytest.fixture(scope="session") +def redshift(aws_config_key): + return RedshiftSpectrum(config_key=aws_config_key) + + +def test_df_to_redshift_spectrum(): + df = pd.DataFrame( + [ + [0, "A"], + [1, "B"], + [2, "C"], + ], + columns=["col1", "col2"], + ) + + @flow + def test_flow(df): + df_to_redshift_spectrum( + df=df, + to_path=f"s3://{S3_BUCKET}/nesso/{TEST_SCHEMA}/{TEST_TABLE}", + schema_name=TEST_SCHEMA, + table=TEST_TABLE, + ) + + df_exists = redshift._check_if_table_exists( + schema=TEST_SCHEMA, + table=TEST_TABLE, + ) + + redshift.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) + + return df_exists + + assert test_flow(df) diff --git a/tests/integration/orchestration/prefect/tasks/test_s3.py b/tests/integration/orchestration/prefect/tasks/test_s3.py new file mode 100644 index 000000000..269a64244 --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_s3.py @@ -0,0 +1,59 @@ +import os +from pathlib import Path + +import pandas as pd +from prefect import flow +import pytest +from viadot.orchestration.prefect.tasks import s3_upload_file +from viadot.utils import skip_test_on_missing_extra + + +try: + from viadot.sources import S3 +except ImportError: + skip_test_on_missing_extra(source_name="S3", extra="aws") + +S3_BUCKET = os.environ.get("S3_BUCKET") +TEST_SCHEMA = "raw_test" +TEST_TABLE = "test_s3_upload_file" + + +@pytest.fixture(scope="session") +def s3(aws_config_key): + return S3(config_key=aws_config_key) + + +@pytest.fixture() +def TEST_FILE_PATH(): + path = "test.csv" + df = pd.DataFrame( + [ + [0, "A"], + [1, "B"], + [2, "C"], + ], + columns=["col1", "col2"], + ) + df.to_csv(path) + + yield path + + Path(path).unlink() + + +def test_s3_upload_file(TEST_FILE_PATH): + @flow + def test_flow(): + file_path = f"s3://{S3_BUCKET}/nesso/{TEST_SCHEMA}/{TEST_TABLE}.csv" + s3_upload_file( + from_path=TEST_FILE_PATH, + to_path=file_path, + ) + + file_exists = s3.exists(path=file_path) + + s3.rm(paths=[file_path]) + + return file_exists + + assert test_flow() is True diff --git a/tests/integration/orchestration/prefect/tasks/test_sap_rfc.py b/tests/integration/orchestration/prefect/tasks/test_sap_rfc.py new file mode 100644 index 000000000..8c2daf1b7 --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_sap_rfc.py @@ -0,0 +1,18 @@ +from prefect import flow +from viadot.orchestration.prefect.tasks import sap_rfc_to_df + + +def test_sap_rfc_to_df(): + @flow + def test_flow(): + return sap_rfc_to_df( + config_key="sap_test", + query="SELECT MATKL, MTART, ERSDA FROM MARA WHERE ERSDA = '20221230'", + func="BBP_RFC_READ_TABLE", + ) + + received_df = test_flow() + n_cols = 3 + + assert not received_df.empty + assert len(received_df.columns) == n_cols diff --git a/tests/integration/orchestration/prefect/tasks/test_sharepoint_tasks.py b/tests/integration/orchestration/prefect/tasks/test_sharepoint_tasks.py new file mode 100644 index 000000000..f558d28fa --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_sharepoint_tasks.py @@ -0,0 +1,37 @@ +import os +from pathlib import Path + +import pandas as pd +from prefect import flow +from viadot.orchestration.prefect.tasks import ( + sharepoint_download_file, + sharepoint_to_df, +) + + +DF1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + + +def test_to_df(sharepoint_url, sharepoint_config_key): + @flow + def test_to_df_flow(): + return sharepoint_to_df(url=sharepoint_url, config_key=sharepoint_config_key) + + received_df = test_to_df_flow() + assert not received_df.empty + + +def test_download_file(sharepoint_url, sharepoint_config_key): + file = "sharepoint_test" + sharepoint_url.split(".")[-1] + + @flow + def test_download_file_flow(): + return sharepoint_download_file( + url=sharepoint_url, to_path=file, config_key=sharepoint_config_key + ) + + test_download_file_flow() + + assert file in os.listdir() + + Path(file).unlink() diff --git a/tests/integration/orchestration/prefect/tasks/test_sql_server.py b/tests/integration/orchestration/prefect/tasks/test_sql_server.py new file mode 100644 index 000000000..f7fdca69c --- /dev/null +++ b/tests/integration/orchestration/prefect/tasks/test_sql_server.py @@ -0,0 +1,59 @@ +import pytest +from viadot.orchestration.prefect.tasks import ( + create_sql_server_table, + sql_server_query, + sql_server_to_df, +) +from viadot.sources import SQLServer + + +TABLE = "test" +SCHEMA = "sandbox" + + +@pytest.fixture() +def sql_server(): + # Initialize the SQLServer instance with the test credentials. + return SQLServer(config_key="sql_server") + + +def test_sql_server_to_df(): + df = sql_server_to_df( + query=""" + SELECT t.name as table_name + ,s.name as schema_name + FROM sys.tables t + JOIN sys.schemas s + ON t.schema_id = s.schema_id""", + credentials_secret="sql-server", # noqa: S106 + ) + + assert not df.empty + + +def test_create_sql_server_table(sql_server): + dtypes = { + "date": "DATE", + "name": "VARCHAR(255)", + "id": "VARCHAR(255)", + "weather": "FLOAT(24)", + "rain": "FLOAT(24)", + "temp": "FLOAT(24)", + "summary": "VARCHAR(255)", + } + create_sql_server_table( + table=TABLE, + schema=SCHEMA, + dtypes=dtypes, + if_exists="replace", + credentials_secret="sql-server", # noqa: S106 + ) + + assert sql_server.exists(table=TABLE, schema=SCHEMA) + + sql_server_query( + query=f"""DROP TABLE {SCHEMA}.{TABLE}""", + credentials_secret="sql-server", # noqa: S106 + ) + + assert not sql_server.exists(table=TABLE, schema=SCHEMA) diff --git a/tests/integration/test_azure_data_lake.py b/tests/integration/test_azure_data_lake.py index 80f10dd85..3a42cbbd4 100644 --- a/tests/integration/test_azure_data_lake.py +++ b/tests/integration/test_azure_data_lake.py @@ -1,5 +1,11 @@ import pandas as pd -from viadot.sources import AzureDataLake +from viadot.utils import skip_test_on_missing_extra + + +try: + from viadot.sources import AzureDataLake +except ImportError: + skip_test_on_missing_extra(source_name="AzureDataLake", extra="azure") def test_upload_csv(TEST_CSV_FILE_PATH, TEST_ADLS_FILE_PATH_CSV): diff --git a/tests/integration/test_cloud_for_customers.py b/tests/integration/test_cloud_for_customers.py index d6223c8ef..199eb820c 100644 --- a/tests/integration/test_cloud_for_customers.py +++ b/tests/integration/test_cloud_for_customers.py @@ -1,13 +1,11 @@ -"""Tests for CloudForCustomers source class""" +"""Tests for CloudForCustomers source class.""" from datetime import datetime, timedelta import pandas as pd -import pytest - -from viadot.exceptions import CredentialError from viadot.sources.cloud_for_customers import CloudForCustomers + CONTACT_ENDPOINT = "ContactCollection" cursor_field = "EntityLastChangedOn" yesterday = datetime.utcnow() - timedelta(days=10) @@ -16,28 +14,6 @@ filter_params = {"$filter": cursor_filter} -def test_is_configured(): - c4c = CloudForCustomers( - credentials={"username": "test_user", "password": "test_password"}, - ) - assert c4c - - -def test_is_configured_throws_credential_error(): - with pytest.raises(CredentialError): - _ = CloudForCustomers( - credentials={"username": "test_user", "password": None}, - ) - with pytest.raises(CredentialError): - _ = CloudForCustomers( - credentials={"username": None, "password": "test_password"}, - ) - with pytest.raises(CredentialError): - _ = CloudForCustomers( - credentials={"username": None, "password": None}, - ) - - def test_to_df(c4c_config_key): c4c = CloudForCustomers( config_key=c4c_config_key, diff --git a/tests/integration/test_databricks.py b/tests/integration/test_databricks.py index 03c3b1d74..15ce93707 100644 --- a/tests/integration/test_databricks.py +++ b/tests/integration/test_databricks.py @@ -1,20 +1,16 @@ +import contextlib + import pandas as pd import pytest +from viadot.exceptions import TableDoesNotExistError +from viadot.utils import add_viadot_metadata_columns, skip_test_on_missing_extra -from viadot.exceptions import TableDoesNotExist -from viadot.utils import add_viadot_metadata_columns try: from pyspark.sql.utils import AnalysisException - from viadot.sources import Databricks - - _databricks_installed = True except ImportError: - _databricks_installed = False - -if not _databricks_installed: - pytest.skip("Databricks source not installed", allow_module_level=True) + skip_test_on_missing_extra(source_name="Databricks", extra="databricks") TEST_SCHEMA = "viadot_test_schema" @@ -66,33 +62,26 @@ def databricks(databricks_config_key): config_key=databricks_config_key, ) - try: + with contextlib.suppress(Exception): databricks.drop_schema(TEST_SCHEMA) databricks.drop_table(TEST_TABLE) - except Exception: - pass databricks.create_schema(TEST_SCHEMA) yield databricks - try: + with contextlib.suppress(TableDoesNotExistError): databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) - except TableDoesNotExist: - pass - try: + + with contextlib.suppress(Exception): databricks.drop_schema(TEST_SCHEMA) - except Exception: - pass databricks.session.stop() @pytest.mark.dependency() def test_create_schema(databricks): - try: + with contextlib.suppress(AnalysisException): databricks.drop_schema(TEST_SCHEMA_2) - except AnalysisException: - pass exists = databricks._check_if_schema_exists(TEST_SCHEMA_2) assert exists is False @@ -103,10 +92,8 @@ def test_create_schema(databricks): exists = databricks._check_if_schema_exists(TEST_SCHEMA_2) assert exists is True - try: + with contextlib.suppress(Exception): databricks.drop_schema(TEST_SCHEMA_2) - except Exception: - pass @pytest.mark.dependency(depends=["test_create_schema"]) @@ -137,10 +124,8 @@ def test_create_table(databricks): assert exists is True # Cleanup. - try: + with contextlib.suppress(Exception): databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) - except Exception: - pass @pytest.mark.dependency(depends=["test_create_table"]) @@ -170,7 +155,7 @@ def test_to_df(databricks): schema=TEST_SCHEMA, table=TEST_TABLE, df=TEST_DF, if_exists="skip" ) - df = databricks.to_df(f"SELECT * FROM {FQN}") + df = databricks.to_df(f"SELECT * FROM {FQN}") # noqa: S608 # Note that all `to_df()` methods are decorated with `@add_viadot_metadata_columns`. # This means that we need to add the metadata columns to the test DataFrame as well @@ -181,8 +166,7 @@ class Fake: def to_df(self): return TEST_DF - test_df = Fake().to_df() - return test_df + return Fake().to_df() test_df = fake_test_df_to_df() assert df.shape == test_df.shape @@ -193,10 +177,8 @@ def to_df(self): @pytest.mark.dependency() def test_create_table_replace(databricks): # Setup. - try: + with contextlib.suppress(Exception): databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) - except Exception: - pass exists = databricks._check_if_table_exists(schema=TEST_SCHEMA, table=TEST_TABLE) assert exists is False @@ -254,7 +236,8 @@ def test_snakecase_column_names(databricks): assert created is True retrieved_value = to_df_no_metadata_cols( - databricks, query=f"SELECT column_to___snake___case FROM {FQN}" + databricks, + query=f"SELECT column_to___snake___case FROM {FQN}", # noqa: S608 ) assert list(retrieved_value) == ["column_to___snake___case"] @@ -267,7 +250,8 @@ def test_snakecase_column_names(databricks): assert updated is True retrieved_value_update = to_df_no_metadata_cols( - databricks, query=f"SELECT column_to___snake___case_22 FROM {FQN}" + databricks, + query=f"SELECT column_to___snake___case_22 FROM {FQN}", # noqa: S608 ) assert list(retrieved_value_update) == ["column_to___snake___case_22"] @@ -288,7 +272,7 @@ def test_create_table_from_pandas_handles_mixed_types(databricks): databricks.drop_schema(TEST_SCHEMA) -# @pytest.mark.dependency(depends=["test_create_table", "test_drop_table", "test_to_df"]) +# @pytest.mark.dependency(depends=["test_create_table", "test_drop_table", "test_to_df"]) # noqa: W505 # def test_insert_into_append(databricks): # databricks.create_table_from_pandas( @@ -307,7 +291,7 @@ def test_create_table_from_pandas_handles_mixed_types(databricks): # databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) -# @pytest.mark.dependency(depends=["test_create_table", "test_drop_table", "test_to_df"]) +# @pytest.mark.dependency(depends=["test_create_table", "test_drop_table", "test_to_df"]) # noqa: W505 # def test_insert_into_replace(databricks): # databricks.create_table_from_pandas( @@ -346,7 +330,7 @@ def test_create_table_from_pandas_handles_mixed_types(databricks): # ) -# @pytest.mark.dependency(depends=["test_create_table", "test_drop_table", "test_to_df"]) +# @pytest.mark.dependency(depends=["test_create_table", "test_drop_table", "test_to_df"]) # noqa: W505 # def test_upsert(databricks): # databricks.create_table_from_pandas( @@ -395,13 +379,13 @@ def test_create_table_from_pandas_handles_mixed_types(databricks): # databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) -# @pytest.mark.dependency(depends=["test_create_table", "test_drop_table", "test_to_df"]) +# @pytest.mark.dependency(depends=["test_create_table", "test_drop_table", "test_to_df"]) # noqa: W505 # def test_rollback(databricks): -# databricks.create_table_from_pandas(schema=TEST_SCHEMA, table=TEST_TABLE, df=TEST_DF) +# databricks.create_table_from_pandas(schema=TEST_SCHEMA, table=TEST_TABLE, df=TEST_DF) # noqa: W505 # # Get the version of the table before applying any changes -# version_number = databricks.get_table_version(schema=TEST_SCHEMA, table=TEST_TABLE) +# version_number = databricks.get_table_version(schema=TEST_SCHEMA, table=TEST_TABLE) # noqa: W505 # # Append to the table # appended = databricks.insert_into( @@ -410,7 +394,7 @@ def test_create_table_from_pandas_handles_mixed_types(databricks): # assert appended # # Rollback to the previous table version -# databricks.rollback(schema=TEST_SCHEMA, table=TEST_TABLE, version_number=version_number) +# databricks.rollback(schema=TEST_SCHEMA, table=TEST_TABLE, version_number=version_number) # noqa: W505 # result = databricks.to_df(f"SELECT * FROM {FQN}") # assert df.shape == result.shape diff --git a/tests/integration/test_exchange_rates.py b/tests/integration/test_exchange_rates.py index e35cd27d3..ece18a883 100644 --- a/tests/integration/test_exchange_rates.py +++ b/tests/integration/test_exchange_rates.py @@ -2,6 +2,7 @@ import pytest from viadot.sources import ExchangeRates + TEST_DATA = { "currencies": [ { @@ -42,14 +43,13 @@ @pytest.fixture(scope="session") def exchange_rates(): - e = ExchangeRates( + return ExchangeRates( currency="PLN", start_date="2022-10-09", end_date="2022-10-11", symbols=["USD", "EUR", "GBP", "CHF", "PLN", "DKK"], config_key="exchange_rates_dev", ) - yield e def test_to_json_values(exchange_rates): diff --git a/tests/integration/test_genesys.py b/tests/integration/test_genesys.py deleted file mode 100644 index bb598f7c4..000000000 --- a/tests/integration/test_genesys.py +++ /dev/null @@ -1,218 +0,0 @@ -from unittest import mock - -import pytest - -from viadot.sources import Genesys - - -@pytest.fixture -def var_dictionary(): - variables = { - "start_date": "2022-08-12", - "media_type_list": ["callback", "chat"], - "queueIds_list": [ - "1234567890", - "1234567890", - ], - "data_to_post": """{ - "name": f"QUEUE_PERFORMANCE_DETAIL_VIEW_{media}", - "timeZone": "UTC", - "exportFormat": "CSV", - "interval": f"{end_date}T23:00:00/{start_date}T23:00:00", - "period": "PT30M", - "viewType": f"QUEUE_PERFORMANCE_DETAIL_VIEW", - "filter": {"mediaTypes": [f"{media}"], "queueIds": [f"{queueid}"], "directions":["inbound"],}, - "read": True, - "locale": "en-us", - "hasFormatDurations": False, - "hasSplitFilters": True, - "excludeEmptyRows": True, - "hasSplitByMedia": True, - "hasSummaryRow": True, - "csvDelimiter": "COMMA", - "hasCustomParticipantAttributes": True, - "recipientEmails": [], - }""", - "report_data": [ - [ - "1234567890qwertyuiopasdfghjklazxcvbn", - "https://apps.mypurecloud.de/example/url/test", - "1234567890qwertyuiopasdfghjklazxcvbn", - "chat", - "QUEUE_PERFORMANCE_DETAIL_VIEW", - "2022-08-12T23:00:00.000Z/2022-08-13T23:00:00.000Z", - "COMPLETED", - ], - [ - "1234567890qwertyuiopasdfghjklazxcvbn", - "https://apps.mypurecloud.de/example/url/test", - "1234567890qwertyuiopasdfghjklazxcvbn", - "chat", - "QUEUE_PERFORMANCE_DETAIL_VIEW", - "2022-08-12T23:00:00.000Z/2022-08-13T23:00:00.000Z", - "COMPLETED", - ], - [ - "1234567890qwertyuiopasdfghjklazxcvbn", - "https://apps.mypurecloud.de/example/url/test", - "1234567890qwertyuiopasdfghjklazxcvbn", - "callback", - "QUEUE_PERFORMANCE_DETAIL_VIEW", - "2022-08-12T23:00:00.000Z/2022-08-13T23:00:00.000Z", - "COMPLETED", - ], - [ - "1234567890qwertyuiopasdfghjklazxcvbn", - "https://apps.mypurecloud.de/example/url/test", - "1234567890qwertyuiopasdfghjklazxcvbn", - "callback", - "QUEUE_PERFORMANCE_DETAIL_VIEW", - "2022-08-12T23:00:00.000Z/2022-08-13T23:00:00.000Z", - "COMPLETED", - ], - ], - "entities": { - "entities": [ - { - "id": "1234567890", - "name": "QUEUE_PERFORMANCE_DETAIL_VIEW_chat", - "runId": "1234567890", - "status": "COMPLETED", - "timeZone": "UTC", - "exportFormat": "CSV", - "interval": "2022-08-02T23:00:00.000Z/2022-08-03T23:00:00.000Z", - "downloadUrl": "https://apps.mypurecloud.de/example/url/test", - "viewType": "QUEUE_PERFORMANCE_DETAIL_VIEW", - "period": "PT30M", - "filter": { - "mediaTypes": ["chat"], - "queueIds": ["1234567890"], - "directions": ["inbound"], - }, - "read": False, - "createdDateTime": "2022-08-03T11:19:47Z", - "modifiedDateTime": "2022-08-03T11:19:49Z", - "locale": "en-us", - "percentageComplete": 1.0, - "hasFormatDurations": False, - "hasSplitFilters": True, - "excludeEmptyRows": True, - "hasSplitByMedia": True, - "hasSummaryRow": True, - "csvDelimiter": "COMMA", - "hasCustomParticipantAttributes": True, - "dateLastConfirmed": "2022-08-03T11:19:47Z", - "intervalKeyType": "ConversationStart", - "enabled": False, - "selfUri": "/api/v2/example/url/test", - }, - ], - "pageSize": 100, - "pageNumber": 1, - "total": 6, - "pageCount": 1, - }, - "ids_mapping": {"1234567890qwertyuiopasdfghjklazxcvbn": "TEST"}, - } - - return variables - - -@pytest.fixture(scope="session") -def genesys(): - g = Genesys(config_key="genesys", schedule_id="1234", environment="test") - - yield g - - -class MockClass: - status_code = 200 - - def json(): - test = {"token_type": None, "access_token": None} - return test - - -@pytest.mark.init -def test_create_genesys_class(genesys): - assert genesys - - -@pytest.mark.init -def test_default_credential_param(genesys): - assert genesys.credentials is not None and isinstance(genesys.credentials, dict) - - -@pytest.mark.init -def test_environment_param(genesys): - assert genesys.environment is not None and isinstance(genesys.environment, str) - - -@pytest.mark.init -def test_schedule_id_param(genesys): - assert genesys.schedule_id is not None and isinstance(genesys.schedule_id, str) - - -@pytest.mark.parametrize("input_name", ["test_name", "12345", ".##@@"]) -@pytest.mark.init -def test_other_inicial_params(input_name): - g = Genesys(report_name=input_name, config_key="genesys") - assert len(g.report_name) > 0 and isinstance(g.report_name, str) - - -@pytest.mark.proper -def test_connection_with_genesys_api(genesys): - test_genesys_connection = genesys.authorization_token - assert ( - isinstance(test_genesys_connection, dict) - and len(test_genesys_connection.items()) > 0 - ) - - -@mock.patch.object(Genesys, "genesys_generate_exports") -@pytest.mark.connection -def test_generate_exports(mock_api_response, var_dictionary, genesys): - assert genesys.genesys_generate_exports() - mock_api_response.assert_called() - - -@mock.patch.object(Genesys, "load_reporting_exports") -@pytest.mark.dependency(["test_generate_exports"]) -@pytest.mark.generate -def test_generate_reports_list(mock_load_reports, var_dictionary, genesys): - mock_load_reports.return_value = var_dictionary["entities"] - genesys.get_reporting_exports_data() - mock_load_reports.assert_called_once() - - -@mock.patch.object(Genesys, "download_report") -@pytest.mark.dependency( - depends=[ - "test_generate_exports", - "test_generate_reports_list", - ] -) -@pytest.mark.download -def test_download_reports(mock_download_files, var_dictionary, genesys): - genesys.ids_mapping = var_dictionary["ids_mapping"] - genesys.report_data = var_dictionary["report_data"] - genesys.start_date = var_dictionary["start_date"] - file_name_list = genesys.download_all_reporting_exports() - - assert isinstance(file_name_list, list) and len(file_name_list) > 0 - mock_download_files.assert_called() - - -@mock.patch("viadot.sources.genesys.handle_api_response", return_value=MockClass) -@pytest.mark.dependency( - depends=[ - "test_generate_exports", - "test_generate_reports_list", - "test_download_reports", - ] -) -@pytest.mark.delete -def test_genesys_delete_reports(mock_api_response, var_dictionary, genesys): - genesys.report_data = var_dictionary["report_data"] - genesys.delete_all_reporting_exports() - mock_api_response.assert_called() diff --git a/tests/integration/test_hubspot.py b/tests/integration/test_hubspot.py new file mode 100644 index 000000000..22c526046 --- /dev/null +++ b/tests/integration/test_hubspot.py @@ -0,0 +1,13 @@ +from viadot.orchestration.prefect.flows import hubspot_to_adls + + +def test_hubspot_to_adls(hubspot_config_key, adls_credentials_secret): + state = hubspot_to_adls( + config_key=hubspot_config_key, + endpoint="hubdb/api/v2/tables/6009756/rows/draft", + nrows=1000000, + adls_azure_key_vault_secret=adls_credentials_secret, + adls_path="raw/dyvenia_sandbox/genesys/hubspot_rial.parquet", + adls_path_overwrite=True, + ) + assert state.is_successful() diff --git a/tests/integration/test_mindful.py b/tests/integration/test_mindful.py new file mode 100644 index 000000000..ad1dfa547 --- /dev/null +++ b/tests/integration/test_mindful.py @@ -0,0 +1,20 @@ +from datetime import date, timedelta + +from viadot.orchestration.prefect.flows import mindful_to_adls + + +start_date = date.today() - timedelta(days=2) +end_date = start_date + timedelta(days=1) +date_interval = [start_date, end_date] + + +def test_mindful_to_adls(mindful_config_key, adls_credentials_secret): + state = mindful_to_adls( + azure_key_vault_secret=mindful_config_key, + endpoint="responses", + date_interval=date_interval, + adls_path="raw/dyvenia_sandbox/mindful", + adls_azure_key_vault_secret=adls_credentials_secret, + adls_path_overwrite=True, + ) + assert state.is_successful() diff --git a/tests/integration/test_minio.py b/tests/integration/test_minio.py index e05f58df1..6a5b07da2 100644 --- a/tests/integration/test_minio.py +++ b/tests/integration/test_minio.py @@ -1,6 +1,14 @@ +from contextlib import nullcontext as does_not_raise + import pytest +from viadot.utils import skip_test_on_missing_extra + + +try: + from viadot.sources import MinIO +except ImportError: + skip_test_on_missing_extra(source_name="MinIO", extra="aws") -from viadot.sources import MinIO TEST_BUCKET = "spark" TEST_SCHEMA = "test_schema" @@ -17,14 +25,12 @@ def minio(minio_config_key): minio = MinIO(config_key=minio_config_key) minio.rm(TEST_TABLE_FILE_PATH) - yield minio + return minio def test_check_connection(minio): - try: + with does_not_raise(): minio.check_connection() - except Exception as e: - assert False, f"Exception:\n{e}" def test_from_df(minio, DF): diff --git a/tests/integration/test_outlook.py b/tests/integration/test_outlook.py new file mode 100644 index 000000000..e631e4d3a --- /dev/null +++ b/tests/integration/test_outlook.py @@ -0,0 +1,16 @@ +from viadot.config import get_source_config +from viadot.orchestration.prefect.flows import outlook_to_adls + + +def test_outlook_to_adls(adls_credentials_secret, outlook_config_key): + mailbox = get_source_config("outlook").get("mailbox") + state = outlook_to_adls( + config_key=outlook_config_key, + mailbox_name=mailbox, + start_date="2023-04-12", + end_date="2023-04-13", + adls_azure_key_vault_secret=adls_credentials_secret, + adls_path=f"raw/dyvenia_sandbox/genesys/{mailbox.split('@')[0].replace('.', '_').replace('-', '_')}.csv", + adls_path_overwrite=True, + ) + assert state.is_successful() diff --git a/tests/integration/test_redshift_spectrum.py b/tests/integration/test_redshift_spectrum.py index 33fb837d0..c5593d662 100644 --- a/tests/integration/test_redshift_spectrum.py +++ b/tests/integration/test_redshift_spectrum.py @@ -1,9 +1,16 @@ +import contextlib import os import pandas as pd import pytest +from viadot.utils import skip_test_on_missing_extra + + +try: + from viadot.sources import S3, RedshiftSpectrum +except ImportError: + skip_test_on_missing_extra(source_name="RedshiftSpectrum", extra="aws") -from viadot.sources import S3, RedshiftSpectrum TEST_DF = pd.DataFrame( [ @@ -26,22 +33,16 @@ def redshift(redshift_config_key): yield redshift - try: + with contextlib.suppress(Exception): redshift.drop_schema(TEST_SCHEMA, drop_glue_database=True) - except Exception: - pass - try: + with contextlib.suppress(AttributeError): redshift._con.close() - except AttributeError: - pass @pytest.fixture(scope="session") def s3(s3_config_key): - s3 = S3(config_key=s3_config_key) - - yield s3 + return S3(config_key=s3_config_key) def test_create_schema(redshift): @@ -130,7 +131,7 @@ def test_from_df(redshift, s3): def test_from_df_no_table_folder_in_to_path(redshift, s3): - """Test that the table folder is created if it's not specified in `to_path`""" + """Test that the table folder is created if it's not specified in `to_path`.""" # Assumptions. table_exists = redshift._check_if_table_exists( schema=TEST_SCHEMA, diff --git a/tests/integration/test_s3.py b/tests/integration/test_s3.py index 63ca21174..53777a0d3 100644 --- a/tests/integration/test_s3.py +++ b/tests/integration/test_s3.py @@ -1,9 +1,18 @@ +import contextlib import os import pandas as pd import pytest +from viadot.utils import skip_test_on_missing_extra + + +try: + from viadot.sources import S3 +except ImportError: + skip_test_on_missing_extra(source_name="S3", extra="aws") + +from pathlib import Path -from viadot.sources import S3 SOURCE_DATA = [ { @@ -27,8 +36,8 @@ TEST_SCHEMA = "test_schema" TEST_TABLE = "test_table" TEST_TABLE_PATH = f"s3://{S3_BUCKET}/viadot/{TEST_SCHEMA}/{TEST_TABLE}" -TEST_TABLE_PATH_PARQUET = os.path.join(TEST_TABLE_PATH, f"{TEST_TABLE}.parquet") -TEST_TABLE_PATH_CSV = os.path.join(TEST_TABLE_PATH, f"{TEST_TABLE}.csv") +TEST_TABLE_PATH_PARQUET = str(Path(TEST_TABLE_PATH) / f"{TEST_TABLE}.parquet") +TEST_TABLE_PATH_CSV = str(Path(TEST_TABLE_PATH) / f"{TEST_TABLE}.csv") @pytest.fixture(scope="session") @@ -38,10 +47,8 @@ def s3(s3_config_key): yield s3 # Remove the s3 table folder. - try: + with contextlib.suppress(Exception): s3.rm(path=TEST_TABLE_PATH) - except Exception: - pass def test_from_df(s3): @@ -136,8 +143,7 @@ def test_upload(s3, TEST_CSV_FILE_PATH): def test_download(s3, TEST_CSV_FILE_PATH): # Assumptions. - downloaded = os.path.exists("test.csv") - assert downloaded is False + assert not Path("test.csv").exists() s3.upload( from_path=TEST_CSV_FILE_PATH, @@ -152,12 +158,10 @@ def test_download(s3, TEST_CSV_FILE_PATH): to_path="test.csv", ) - downloaded = os.path.exists("test.csv") - - assert downloaded is True + assert Path("test.csv").exists() # Cleanup. - os.remove("test.csv") + Path("test.csv").unlink() s3.rm(path=TEST_TABLE_PATH) diff --git a/tests/integration/test_sharepoint.py b/tests/integration/test_sharepoint.py index 6c044adbf..620921349 100644 --- a/tests/integration/test_sharepoint.py +++ b/tests/integration/test_sharepoint.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import pytest from viadot.exceptions import CredentialError @@ -57,4 +58,4 @@ def test_download_file(sharepoint_url, sharepoint_config_key): assert file in os.listdir() - os.remove(file) + Path(file).unlink() diff --git a/tests/integration/test_sql_server.py b/tests/integration/test_sql_server.py new file mode 100644 index 000000000..1d59a9028 --- /dev/null +++ b/tests/integration/test_sql_server.py @@ -0,0 +1,35 @@ +from datetime import datetime, timedelta, timezone +import struct + +import pytest +from viadot.sources import SQLServer + + +@pytest.fixture() +def sql_server(): + # Initialize the SQLServer instance with the test credentials. + return SQLServer(config_key="sql_server") + + +def test_handle_datetimeoffset(): + # Example test data for a datetimeoffset value. + dto_value = struct.pack("<6hI2h", 2021, 7, 3, 14, 30, 15, 123456000, 2, 0) + expected_datetime = datetime( + 2021, 7, 3, 14, 30, 15, 123456, tzinfo=timezone(timedelta(hours=2)) + ) + + result = SQLServer._handle_datetimeoffset(dto_value) + assert result == expected_datetime + + +def test_schemas(sql_server): + schemas = sql_server.schemas + assert "dbo" in schemas # Assuming 'dbo' schema exists in the test database. + + +def test_exists(sql_server): + sql_server_table = sql_server.tables + sample_table_schema = sql_server_table[0].split(".") + sample_schema = sample_table_schema[0] + sample_table = sample_table_schema[1] + assert sql_server.exists(table=sample_table, schema=sample_schema) diff --git a/tests/integration/test_trino.py b/tests/integration/test_trino.py index feee2a5bf..67cf0adc3 100644 --- a/tests/integration/test_trino.py +++ b/tests/integration/test_trino.py @@ -1,8 +1,10 @@ +import contextlib + import pyarrow as pa import pytest - from viadot.sources import Trino + TEST_BUCKET = "spark" TEST_SCHEMA = "test_schema" TEST_SCHEMA_PATH = f"s3a://{TEST_BUCKET}/{TEST_SCHEMA}" @@ -14,12 +16,9 @@ def trino(trino_config_key): trino = Trino(config_key=trino_config_key) - try: + with contextlib.suppress(Exception): trino.drop_schema(TEST_SCHEMA, cascade=True) - except Exception: - pass - - yield trino + return trino def test_get_schemas(trino): @@ -54,18 +53,17 @@ def test_drop_schema(trino): def test__create_table_query_basic(trino, DF): """Test that the most basic create table query is construed as expected.""" - pa_table = pa.Table.from_pandas(DF) - TEST_DF_COLUMNS = pa_table.schema.names - TEST_DF_TYPES = [ + test_df_columns = pa_table.schema.names + test_df_types = [ trino.pyarrow_to_trino_type(str(typ)) for typ in pa_table.schema.types ] query = trino._create_table_query( schema_name=TEST_SCHEMA, table_name=TEST_TABLE, - columns=TEST_DF_COLUMNS, - types=TEST_DF_TYPES, + columns=test_df_columns, + types=test_df_types, ) expected_query = f""" @@ -80,18 +78,17 @@ def test__create_table_query_basic(trino, DF): def test__create_table_query_partitions(trino, DF): """Test create table query is construed as expected when partitions are provided.""" - pa_table = pa.Table.from_pandas(DF) - TEST_DF_COLUMNS = pa_table.schema.names - TEST_DF_TYPES = [ + test_df_columns = pa_table.schema.names + test_df_types = [ trino.pyarrow_to_trino_type(str(typ)) for typ in pa_table.schema.types ] query = trino._create_table_query( schema_name=TEST_SCHEMA, table_name=TEST_TABLE, - columns=TEST_DF_COLUMNS, - types=TEST_DF_TYPES, + columns=test_df_columns, + types=test_df_types, partition_cols=["country"], ) @@ -107,18 +104,17 @@ def test__create_table_query_partitions(trino, DF): def test__create_table_query_full(trino, DF): """Test create table query is construed as expected when partitions are provided.""" - pa_table = pa.Table.from_pandas(DF) - TEST_DF_COLUMNS = pa_table.schema.names - TEST_DF_TYPES = [ + test_df_columns = pa_table.schema.names + test_df_types = [ trino.pyarrow_to_trino_type(str(typ)) for typ in pa_table.schema.types ] query = trino._create_table_query( schema_name=TEST_SCHEMA, table_name=TEST_TABLE, - columns=TEST_DF_COLUMNS, - types=TEST_DF_TYPES, + columns=test_df_columns, + types=test_df_types, format="ORC", partition_cols=["country"], sort_by_cols=["country"], diff --git a/tests/pytest.ini b/tests/pytest.ini index cfa251015..0ff420631 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -2,9 +2,16 @@ log_cli = True log_cli_level = WARNING addopts = --verbose -ra +markers = + basic: mark a test as basic call to the connector + connect: mark a test as the API connection + response: mark a test as the API response + functions: mark a test as an independent function filterwarnings = ignore::sqlalchemy.exc.SADeprecationWarning ignore::urllib3.exceptions.InsecureRequestWarning # Silence aiohttp warnings raised through aiobotocore in the MinIO source. ignore::DeprecationWarning:aiobotocore + ignore::RuntimeWarning:unittest + ignore::DeprecationWarning:jupyter_client diff --git a/tests/resources/metadata/.luma/config.yaml b/tests/resources/metadata/.luma/config.yaml new file mode 100644 index 000000000..3e81c7c3b --- /dev/null +++ b/tests/resources/metadata/.luma/config.yaml @@ -0,0 +1,11 @@ +groups: + - meta_key: "domains" + slug: "domains" + label_plural: "Domains" + label_singular: "Domain" + icon: "Cube" + - meta_key: "criticality" + slug: "criticality" + label_plural: "Criticality" + label_singular: "Criticality" + in_sidebar: false diff --git a/tests/resources/metadata/.luma/owners.yaml b/tests/resources/metadata/.luma/owners.yaml new file mode 100644 index 000000000..a949bde5f --- /dev/null +++ b/tests/resources/metadata/.luma/owners.yaml @@ -0,0 +1,9 @@ +owners: + - email: "user1@example.com" + first_name: "User" + last_name: "One" + title: "Senior Data Analyst" + - email: "user2@example.com" + first_name: "User" + last_name: "Two" + title: "Product Owner" diff --git a/tests/resources/metadata/model/catalog.json b/tests/resources/metadata/model/catalog.json new file mode 100644 index 000000000..c9e67db3d --- /dev/null +++ b/tests/resources/metadata/model/catalog.json @@ -0,0 +1,398 @@ +{ + "metadata": { + "dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "dbt_version": "1.7.10", + "generated_at": "2024-03-26T13:01:14.442740Z", + "invocation_id": "a0132c71-8d3b-43f9-bf6d-5f415cbeb4e9", + "env": {} + }, + "nodes": { + "model.my_nesso_project.account": { + "metadata": { + "type": "BASE TABLE", + "schema": "dbt", + "name": "account", + "database": "nesso", + "comment": null, + "owner": null + }, + "columns": { + "id": { + "type": "BIGINT", + "index": 1, + "name": "id", + "comment": null + }, + "name": { + "type": "VARCHAR", + "index": 2, + "name": "name", + "comment": null + }, + "email": { + "type": "VARCHAR", + "index": 3, + "name": "email", + "comment": null + }, + "mobile": { + "type": "VARCHAR", + "index": 4, + "name": "mobile", + "comment": null + }, + "country": { + "type": "VARCHAR", + "index": 5, + "name": "country", + "comment": null + }, + "_viadot_downloaded_at_utc": { + "type": "TIMESTAMP_NS", + "index": 6, + "name": "_viadot_downloaded_at_utc", + "comment": null + } + }, + "stats": { + "has_stats": { + "id": "has_stats", + "label": "Has Stats?", + "value": false, + "include": false, + "description": "Indicates whether there are statistics for this table" + } + }, + "unique_id": "model.my_nesso_project.account" + }, + "model.my_nesso_project.contact": { + "metadata": { + "type": "BASE TABLE", + "schema": "dbt", + "name": "contact", + "database": "nesso", + "comment": null, + "owner": null + }, + "columns": { + "id": { + "type": "BIGINT", + "index": 1, + "name": "id", + "comment": null + }, + "accountid": { + "type": "BIGINT", + "index": 2, + "name": "accountid", + "comment": null + }, + "firstname": { + "type": "VARCHAR", + "index": 3, + "name": "firstname", + "comment": null + }, + "lastname": { + "type": "VARCHAR", + "index": 4, + "name": "lastname", + "comment": null + }, + "contactemail": { + "type": "VARCHAR", + "index": 5, + "name": "contactemail", + "comment": null + }, + "mailingcity": { + "type": "VARCHAR", + "index": 6, + "name": "mailingcity", + "comment": null + }, + "country": { + "type": "VARCHAR", + "index": 7, + "name": "country", + "comment": null + }, + "_viadot_downloaded_at_utc": { + "type": "TIMESTAMP_NS", + "index": 8, + "name": "_viadot_downloaded_at_utc", + "comment": null + } + }, + "stats": { + "has_stats": { + "id": "has_stats", + "label": "Has Stats?", + "value": false, + "include": false, + "description": "Indicates whether there are statistics for this table" + } + }, + "unique_id": "model.my_nesso_project.contact" + }, + "model.my_nesso_project.int_account": { + "metadata": { + "type": "VIEW", + "schema": "dbt", + "name": "int_account", + "database": "nesso", + "comment": null, + "owner": null + }, + "columns": { + "id": { + "type": "BIGINT", + "index": 1, + "name": "id", + "comment": null + }, + "name": { + "type": "VARCHAR", + "index": 2, + "name": "name", + "comment": null + }, + "email": { + "type": "VARCHAR", + "index": 3, + "name": "email", + "comment": null + }, + "mobile": { + "type": "VARCHAR", + "index": 4, + "name": "mobile", + "comment": null + }, + "country": { + "type": "VARCHAR", + "index": 5, + "name": "country", + "comment": null + }, + "_viadot_downloaded_at_utc": { + "type": "TIMESTAMP_NS", + "index": 6, + "name": "_viadot_downloaded_at_utc", + "comment": null + } + }, + "stats": { + "has_stats": { + "id": "has_stats", + "label": "Has Stats?", + "value": false, + "include": false, + "description": "Indicates whether there are statistics for this table" + } + }, + "unique_id": "model.my_nesso_project.int_account" + }, + "model.my_nesso_project.int_contact": { + "metadata": { + "type": "VIEW", + "schema": "dbt", + "name": "int_contact", + "database": "nesso", + "comment": null, + "owner": null + }, + "columns": { + "id": { + "type": "BIGINT", + "index": 1, + "name": "id", + "comment": null + }, + "accountid": { + "type": "BIGINT", + "index": 2, + "name": "accountid", + "comment": null + }, + "firstname": { + "type": "VARCHAR", + "index": 3, + "name": "firstname", + "comment": null + }, + "lastname": { + "type": "VARCHAR", + "index": 4, + "name": "lastname", + "comment": null + }, + "contactemail": { + "type": "VARCHAR", + "index": 5, + "name": "contactemail", + "comment": null + }, + "mailingcity": { + "type": "VARCHAR", + "index": 6, + "name": "mailingcity", + "comment": null + }, + "country": { + "type": "VARCHAR", + "index": 7, + "name": "country", + "comment": null + }, + "_viadot_downloaded_at_utc": { + "type": "TIMESTAMP_NS", + "index": 8, + "name": "_viadot_downloaded_at_utc", + "comment": null + } + }, + "stats": { + "has_stats": { + "id": "has_stats", + "label": "Has Stats?", + "value": false, + "include": false, + "description": "Indicates whether there are statistics for this table" + } + }, + "unique_id": "model.my_nesso_project.int_contact" + } + }, + "sources": { + "source.my_nesso_project.staging.account": { + "metadata": { + "type": "BASE TABLE", + "schema": "staging", + "name": "account", + "database": "nesso", + "comment": null, + "owner": null + }, + "columns": { + "id": { + "type": "BIGINT", + "index": 1, + "name": "id", + "comment": null + }, + "name": { + "type": "VARCHAR", + "index": 2, + "name": "name", + "comment": null + }, + "email": { + "type": "VARCHAR", + "index": 3, + "name": "email", + "comment": null + }, + "mobile": { + "type": "VARCHAR", + "index": 4, + "name": "mobile", + "comment": null + }, + "country": { + "type": "VARCHAR", + "index": 5, + "name": "country", + "comment": null + }, + "_viadot_downloaded_at_utc": { + "type": "TIMESTAMP_NS", + "index": 6, + "name": "_viadot_downloaded_at_utc", + "comment": null + } + }, + "stats": { + "has_stats": { + "id": "has_stats", + "label": "Has Stats?", + "value": false, + "include": false, + "description": "Indicates whether there are statistics for this table" + } + }, + "unique_id": "source.my_nesso_project.staging.account" + }, + "source.my_nesso_project.staging.contact": { + "metadata": { + "type": "BASE TABLE", + "schema": "staging", + "name": "contact", + "database": "nesso", + "comment": null, + "owner": null + }, + "columns": { + "Id": { + "type": "BIGINT", + "index": 1, + "name": "Id", + "comment": null + }, + "AccountId": { + "type": "BIGINT", + "index": 2, + "name": "AccountId", + "comment": null + }, + "FirstName": { + "type": "VARCHAR", + "index": 3, + "name": "FirstName", + "comment": null + }, + "LastName": { + "type": "VARCHAR", + "index": 4, + "name": "LastName", + "comment": null + }, + "ContactEMail": { + "type": "VARCHAR", + "index": 5, + "name": "ContactEMail", + "comment": null + }, + "MailingCity": { + "type": "VARCHAR", + "index": 6, + "name": "MailingCity", + "comment": null + }, + "Country": { + "type": "VARCHAR", + "index": 7, + "name": "Country", + "comment": null + }, + "_viadot_downloaded_at_utc": { + "type": "TIMESTAMP_NS", + "index": 8, + "name": "_viadot_downloaded_at_utc", + "comment": null + } + }, + "stats": { + "has_stats": { + "id": "has_stats", + "label": "Has Stats?", + "value": false, + "include": false, + "description": "Indicates whether there are statistics for this table" + } + }, + "unique_id": "source.my_nesso_project.staging.contact" + } + }, + "errors": null +} diff --git a/tests/resources/metadata/model/manifest.json b/tests/resources/metadata/model/manifest.json new file mode 100644 index 000000000..60a008e67 --- /dev/null +++ b/tests/resources/metadata/model/manifest.json @@ -0,0 +1,21680 @@ +{ + "metadata": { + "dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "dbt_version": "1.7.10", + "generated_at": "2024-03-26T13:01:11.621552Z", + "invocation_id": "a0132c71-8d3b-43f9-bf6d-5f415cbeb4e9", + "env": {}, + "project_name": "my_nesso_project", + "project_id": "13718eaf7afb51e6735da63722f5953d", + "user_id": null, + "send_anonymous_usage_stats": true, + "adapter_type": "duckdb" + }, + "nodes": { + "model.my_nesso_project.int_contact": { + "database": "nesso", + "schema": "dbt", + "name": "int_contact", + "resource_type": "model", + "package_name": "my_nesso_project", + "path": "intermediate/int_contact/int_contact.sql", + "original_file_path": "models/intermediate/int_contact/int_contact.sql", + "unique_id": "model.my_nesso_project.int_contact", + "fqn": ["my_nesso_project", "intermediate", "int_contact", "int_contact"], + "alias": "int_contact", + "checksum": { + "name": "sha256", + "checksum": "1cea96f85a80fac8c205fb64693120e13a7805f285144a89af767f619e3529c1" + }, + "config": { + "enabled": true, + "alias": null, + "schema": "intermediate", + "database": null, + "tags": [], + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "None" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": [], + "true_source": [], + "SLA": "24 hours" + }, + "group": null, + "materialized": "view", + "incremental_strategy": "merge", + "persist_docs": {}, + "post-hook": [], + "pre-hook": [], + "quoting": {}, + "column_types": {}, + "full_refresh": null, + "unique_key": null, + "on_schema_change": "sync_all_columns", + "on_configuration_change": "apply", + "grants": {}, + "packages": [], + "docs": { + "show": true, + "node_color": null + }, + "contract": { + "enforced": false, + "alias_types": true + }, + "access": "protected" + }, + "tags": [], + "description": "Base model of the `contact` table.", + "columns": { + "id": { + "name": "id", + "description": "", + "meta": {}, + "data_type": "BIGINT", + "constraints": [], + "quote": true, + "tags": [] + }, + "accountid": { + "name": "accountid", + "description": "", + "meta": {}, + "data_type": "BIGINT", + "constraints": [], + "quote": true, + "tags": [] + }, + "firstname": { + "name": "firstname", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "lastname": { + "name": "lastname", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "contactemail": { + "name": "contactemail", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "mailingcity": { + "name": "mailingcity", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "country": { + "name": "country", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "_viadot_downloaded_at_utc": { + "name": "_viadot_downloaded_at_utc", + "description": "", + "meta": {}, + "data_type": "TIMESTAMP_NS", + "constraints": [], + "quote": true, + "tags": [] + } + }, + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "None" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": [], + "true_source": [], + "SLA": "24 hours" + }, + "group": null, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": "my_nesso_project://models/intermediate/int_contact/int_contact.yml", + "build_path": null, + "deferred": false, + "unrendered_config": { + "on_schema_change": "sync_all_columns", + "incremental_strategy": "merge", + "materialized": "view", + "schema": "intermediate", + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "None" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": [], + "true_source": [], + "SLA": "24 hours" + } + }, + "created_at": 1711458073.882494, + "relation_name": "\"nesso\".\"dbt\".\"int_contact\"", + "raw_code": "with _masked as (\n select\n \"Id\",\n \"AccountId\",\n \"FirstName\",\n \"LastName\",\n \"ContactEMail\",\n \"MailingCity\",\n \"Country\",\n \"_viadot_downloaded_at_utc\"\n from {{ source(\"staging\", \"contact\") }}\n),\n\nrenamed as (\n select\n \"Id\" as \"id\",\n \"AccountId\" as \"accountid\",\n \"FirstName\" as \"firstname\",\n \"LastName\" as \"lastname\",\n \"ContactEMail\" as \"contactemail\",\n \"MailingCity\" as \"mailingcity\",\n \"Country\" as \"country\",\n \"_viadot_downloaded_at_utc\" as \"_viadot_downloaded_at_utc\"\n\n from _masked\n)\n\nselect * from renamed", + "language": "sql", + "refs": [], + "sources": [["staging", "contact"]], + "metrics": [], + "depends_on": { + "macros": [], + "nodes": ["source.my_nesso_project.staging.contact"] + }, + "compiled_path": "target/compiled/my_nesso_project/models/intermediate/int_contact/int_contact.sql", + "compiled": true, + "compiled_code": "with _masked as (\n select\n \"Id\",\n \"AccountId\",\n \"FirstName\",\n \"LastName\",\n \"ContactEMail\",\n \"MailingCity\",\n \"Country\",\n \"_viadot_downloaded_at_utc\"\n from \"nesso\".\"staging\".\"contact\"\n),\n\nrenamed as (\n select\n \"Id\" as \"id\",\n \"AccountId\" as \"accountid\",\n \"FirstName\" as \"firstname\",\n \"LastName\" as \"lastname\",\n \"ContactEMail\" as \"contactemail\",\n \"MailingCity\" as \"mailingcity\",\n \"Country\" as \"country\",\n \"_viadot_downloaded_at_utc\" as \"_viadot_downloaded_at_utc\"\n\n from _masked\n)\n\nselect * from renamed", + "extra_ctes_injected": true, + "extra_ctes": [], + "contract": { + "enforced": false, + "alias_types": true, + "checksum": null + }, + "access": "protected", + "constraints": [], + "version": null, + "latest_version": null, + "deprecation_date": null + }, + "model.my_nesso_project.int_account": { + "database": "nesso", + "schema": "dbt", + "name": "int_account", + "resource_type": "model", + "package_name": "my_nesso_project", + "path": "intermediate/int_account/int_account.sql", + "original_file_path": "models/intermediate/int_account/int_account.sql", + "unique_id": "model.my_nesso_project.int_account", + "fqn": ["my_nesso_project", "intermediate", "int_account", "int_account"], + "alias": "int_account", + "checksum": { + "name": "sha256", + "checksum": "a7e1f1f4b00c3bb94c0d8d2446386a79155913cd6193b9d589ddc4254d845fbd" + }, + "config": { + "enabled": true, + "alias": null, + "schema": "intermediate", + "database": null, + "tags": [], + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "None" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": [], + "true_source": [], + "SLA": "24 hours" + }, + "group": null, + "materialized": "view", + "incremental_strategy": "merge", + "persist_docs": {}, + "post-hook": [], + "pre-hook": [], + "quoting": {}, + "column_types": {}, + "full_refresh": null, + "unique_key": null, + "on_schema_change": "sync_all_columns", + "on_configuration_change": "apply", + "grants": {}, + "packages": [], + "docs": { + "show": true, + "node_color": null + }, + "contract": { + "enforced": false, + "alias_types": true + }, + "access": "protected" + }, + "tags": [], + "description": "Base model of the `account` table.", + "columns": { + "id": { + "name": "id", + "description": "", + "meta": {}, + "data_type": "BIGINT", + "constraints": [], + "quote": true, + "tags": [] + }, + "name": { + "name": "name", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "email": { + "name": "email", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "mobile": { + "name": "mobile", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "country": { + "name": "country", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "_viadot_downloaded_at_utc": { + "name": "_viadot_downloaded_at_utc", + "description": "", + "meta": {}, + "data_type": "TIMESTAMP_NS", + "constraints": [], + "quote": true, + "tags": [] + } + }, + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "None" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": [], + "true_source": [], + "SLA": "24 hours" + }, + "group": null, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": "my_nesso_project://models/intermediate/int_account/int_account.yml", + "build_path": null, + "deferred": false, + "unrendered_config": { + "on_schema_change": "sync_all_columns", + "incremental_strategy": "merge", + "materialized": "view", + "schema": "intermediate", + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "None" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": [], + "true_source": [], + "SLA": "24 hours" + } + }, + "created_at": 1711458073.8853545, + "relation_name": "\"nesso\".\"dbt\".\"int_account\"", + "raw_code": "with _masked as (\n select\n \"id\",\n \"name\",\n \"email\",\n \"mobile\",\n \"country\",\n \"_viadot_downloaded_at_utc\"\n from {{ source(\"staging\", \"account\") }}\n),\n\nrenamed as (\n select\n \"id\" as \"id\",\n \"name\" as \"name\",\n \"email\" as \"email\",\n \"mobile\" as \"mobile\",\n \"country\" as \"country\",\n \"_viadot_downloaded_at_utc\" as \"_viadot_downloaded_at_utc\"\n\n from _masked\n)\n\nselect * from renamed", + "language": "sql", + "refs": [], + "sources": [["staging", "account"]], + "metrics": [], + "depends_on": { + "macros": [], + "nodes": ["source.my_nesso_project.staging.account"] + }, + "compiled_path": "target/compiled/my_nesso_project/models/intermediate/int_account/int_account.sql", + "compiled": true, + "compiled_code": "with _masked as (\n select\n \"id\",\n \"name\",\n \"email\",\n \"mobile\",\n \"country\",\n \"_viadot_downloaded_at_utc\"\n from \"nesso\".\"staging\".\"account\"\n),\n\nrenamed as (\n select\n \"id\" as \"id\",\n \"name\" as \"name\",\n \"email\" as \"email\",\n \"mobile\" as \"mobile\",\n \"country\" as \"country\",\n \"_viadot_downloaded_at_utc\" as \"_viadot_downloaded_at_utc\"\n\n from _masked\n)\n\nselect * from renamed", + "extra_ctes_injected": true, + "extra_ctes": [], + "contract": { + "enforced": false, + "alias_types": true, + "checksum": null + }, + "access": "protected", + "constraints": [], + "version": null, + "latest_version": null, + "deprecation_date": null + }, + "model.my_nesso_project.contact": { + "database": "nesso", + "schema": "dbt", + "name": "contact", + "resource_type": "model", + "package_name": "my_nesso_project", + "path": "marts/sales/contact/contact.sql", + "original_file_path": "models/marts/sales/contact/contact.sql", + "unique_id": "model.my_nesso_project.contact", + "fqn": ["my_nesso_project", "marts", "sales", "contact", "contact"], + "alias": "contact", + "checksum": { + "name": "sha256", + "checksum": "e36b3d2c0d12256b723b3d363505026450dfaea776234e6c988ecd2622cd7987" + }, + "config": { + "enabled": true, + "alias": null, + "schema": null, + "database": null, + "tags": [], + "meta": {}, + "group": null, + "materialized": "table", + "incremental_strategy": "merge", + "persist_docs": {}, + "post-hook": [], + "pre-hook": [], + "quoting": {}, + "column_types": {}, + "full_refresh": null, + "unique_key": null, + "on_schema_change": "sync_all_columns", + "on_configuration_change": "apply", + "grants": {}, + "packages": [], + "docs": { + "show": true, + "node_color": null + }, + "contract": { + "enforced": false, + "alias_types": true + }, + "access": "protected" + }, + "tags": [], + "description": "", + "columns": {}, + "meta": {}, + "group": null, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "build_path": null, + "deferred": false, + "unrendered_config": { + "on_schema_change": "sync_all_columns", + "incremental_strategy": "merge", + "materialized": "table" + }, + "created_at": 1711458073.6092288, + "relation_name": "\"nesso\".\"dbt\".\"contact\"", + "raw_code": "select * from {{ ref('int_contact') }}", + "language": "sql", + "refs": [ + { + "name": "int_contact", + "package": null, + "version": null + } + ], + "sources": [], + "metrics": [], + "depends_on": { + "macros": [], + "nodes": ["model.my_nesso_project.int_contact"] + }, + "compiled_path": "target/compiled/my_nesso_project/models/marts/sales/contact/contact.sql", + "compiled": true, + "compiled_code": "select * from \"nesso\".\"dbt\".\"int_contact\"", + "extra_ctes_injected": true, + "extra_ctes": [], + "contract": { + "enforced": false, + "alias_types": true, + "checksum": null + }, + "access": "protected", + "constraints": [], + "version": null, + "latest_version": null, + "deprecation_date": null + }, + "model.my_nesso_project.account": { + "database": "nesso", + "schema": "dbt", + "name": "account", + "resource_type": "model", + "package_name": "my_nesso_project", + "path": "marts/sales/account/account.sql", + "original_file_path": "models/marts/sales/account/account.sql", + "unique_id": "model.my_nesso_project.account", + "fqn": ["my_nesso_project", "marts", "sales", "account", "account"], + "alias": "account", + "checksum": { + "name": "sha256", + "checksum": "1a428c2d7c3696496a9ed13bb0f44696052702c8c23bd52874bf7041e1d5c548" + }, + "config": { + "enabled": true, + "alias": null, + "schema": null, + "database": null, + "tags": [], + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "mzawadzki@dyvenia.com" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": ["TestDomain"], + "true_source": ["Manual"], + "SLA": "24 hours" + }, + "group": null, + "materialized": "table", + "incremental_strategy": "merge", + "persist_docs": {}, + "post-hook": [], + "pre-hook": [], + "quoting": {}, + "column_types": {}, + "full_refresh": null, + "unique_key": null, + "on_schema_change": "sync_all_columns", + "on_configuration_change": "apply", + "grants": {}, + "packages": [], + "docs": { + "show": true, + "node_color": null + }, + "contract": { + "enforced": false, + "alias_types": true + }, + "access": "protected" + }, + "tags": [], + "description": "Test account table.", + "columns": { + "id": { + "name": "id", + "description": "", + "meta": {}, + "data_type": "BIGINT", + "constraints": [], + "quote": true, + "tags": [] + }, + "name": { + "name": "name", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "email": { + "name": "email", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "mobile": { + "name": "mobile", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "country": { + "name": "country", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "_viadot_downloaded_at_utc": { + "name": "_viadot_downloaded_at_utc", + "description": "", + "meta": {}, + "data_type": "TIMESTAMP_NS", + "constraints": [], + "quote": true, + "tags": [] + } + }, + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "mzawadzki@dyvenia.com" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": ["TestDomain"], + "true_source": ["Manual"], + "SLA": "24 hours" + }, + "group": null, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": "my_nesso_project://models/marts/sales/account/account.yml", + "build_path": null, + "deferred": false, + "unrendered_config": { + "on_schema_change": "sync_all_columns", + "incremental_strategy": "merge", + "materialized": "table", + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "mzawadzki@dyvenia.com" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": ["TestDomain"], + "true_source": ["Manual"], + "SLA": "24 hours" + } + }, + "created_at": 1711458073.8889043, + "relation_name": "\"nesso\".\"dbt\".\"account\"", + "raw_code": "select * from {{ ref('int_account') }}", + "language": "sql", + "refs": [ + { + "name": "int_account", + "package": null, + "version": null + } + ], + "sources": [], + "metrics": [], + "depends_on": { + "macros": [], + "nodes": ["model.my_nesso_project.int_account"] + }, + "compiled_path": "target/compiled/my_nesso_project/models/marts/sales/account/account.sql", + "compiled": true, + "compiled_code": "select * from \"nesso\".\"dbt\".\"int_account\"", + "extra_ctes_injected": true, + "extra_ctes": [], + "contract": { + "enforced": false, + "alias_types": true, + "checksum": null + }, + "access": "protected", + "constraints": [], + "version": null, + "latest_version": null, + "deprecation_date": null + }, + "test.my_nesso_project.unique_account__id_.e2119e8bab": { + "test_metadata": { + "name": "unique", + "kwargs": { + "column_name": "\"id\"", + "model": "{{ get_where_subquery(ref('account')) }}" + }, + "namespace": null + }, + "database": "nesso", + "schema": "dbt", + "name": "unique_account__id_", + "resource_type": "test", + "package_name": "my_nesso_project", + "path": "unique_account__id_.sql", + "original_file_path": "models/marts/sales/account/account.yml", + "unique_id": "test.my_nesso_project.unique_account__id_.e2119e8bab", + "fqn": [ + "my_nesso_project", + "marts", + "sales", + "account", + "unique_account__id_" + ], + "alias": "unique_account__id_", + "checksum": { + "name": "none", + "checksum": "" + }, + "config": { + "enabled": true, + "alias": null, + "schema": "dbt_test__audit", + "database": null, + "tags": [], + "meta": {}, + "group": null, + "materialized": "test", + "severity": "ERROR", + "store_failures": null, + "store_failures_as": null, + "where": null, + "limit": null, + "fail_calc": "count(*)", + "warn_if": "!= 0", + "error_if": "!= 0" + }, + "tags": [], + "description": "", + "columns": {}, + "meta": {}, + "group": null, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "build_path": null, + "deferred": false, + "unrendered_config": {}, + "created_at": 1711458073.9644363, + "relation_name": null, + "raw_code": "{{ test_unique(**_dbt_generic_test_kwargs) }}", + "language": "sql", + "refs": [ + { + "name": "account", + "package": null, + "version": null + } + ], + "sources": [], + "metrics": [], + "depends_on": { + "macros": ["macro.dbt.test_unique", "macro.dbt.get_where_subquery"], + "nodes": ["model.my_nesso_project.account"] + }, + "compiled_path": "target/compiled/my_nesso_project/models/marts/sales/account/account.yml/unique_account__id_.sql", + "compiled": true, + "compiled_code": "\n \n \n\nselect\n \"id\" as unique_field,\n count(*) as n_records\n\nfrom \"nesso\".\"dbt\".\"account\"\nwhere \"id\" is not null\ngroup by \"id\"\nhaving count(*) > 1\n\n\n", + "extra_ctes_injected": true, + "extra_ctes": [], + "contract": { + "enforced": false, + "alias_types": true, + "checksum": null + }, + "column_name": "\"id\"", + "file_key_name": "models.account", + "attached_node": "model.my_nesso_project.account" + } + }, + "sources": { + "source.my_nesso_project.staging.contact": { + "database": "nesso", + "schema": "staging", + "name": "contact", + "resource_type": "source", + "package_name": "my_nesso_project", + "path": "models/sources/staging/staging.yml", + "original_file_path": "models/sources/staging/staging.yml", + "unique_id": "source.my_nesso_project.staging.contact", + "fqn": ["my_nesso_project", "sources", "staging", "staging", "contact"], + "source_name": "staging", + "source_description": "The input layer to the data modelling project.", + "loader": "", + "identifier": "contact", + "quoting": { + "database": null, + "schema": null, + "identifier": null, + "column": null + }, + "loaded_at_field": "_viadot_downloaded_at_utc::timestamp", + "freshness": { + "warn_after": { + "count": 24, + "period": "hour" + }, + "error_after": { + "count": 48, + "period": "hour" + }, + "filter": null + }, + "external": null, + "description": "## `contact` table\n\n### \ud83d\udcdd Details\n-\n\n### \ud83d\udcda External docs\n-", + "columns": { + "Id": { + "name": "Id", + "description": "", + "meta": {}, + "data_type": "BIGINT", + "constraints": [], + "quote": true, + "tags": [] + }, + "AccountId": { + "name": "AccountId", + "description": "", + "meta": {}, + "data_type": "BIGINT", + "constraints": [], + "quote": true, + "tags": [] + }, + "FirstName": { + "name": "FirstName", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "LastName": { + "name": "LastName", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "ContactEMail": { + "name": "ContactEMail", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "MailingCity": { + "name": "MailingCity", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "Country": { + "name": "Country", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "_viadot_downloaded_at_utc": { + "name": "_viadot_downloaded_at_utc", + "description": "", + "meta": {}, + "data_type": "TIMESTAMP_NS", + "constraints": [], + "quote": true, + "tags": [] + } + }, + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "None" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": [], + "true_source": [], + "SLA": "24 hours" + }, + "source_meta": {}, + "tags": [], + "config": { + "enabled": true + }, + "patch_path": null, + "unrendered_config": {}, + "relation_name": "\"nesso\".\"staging\".\"contact\"", + "created_at": 1711458073.9675865 + }, + "source.my_nesso_project.staging.account": { + "database": "nesso", + "schema": "staging", + "name": "account", + "resource_type": "source", + "package_name": "my_nesso_project", + "path": "models/sources/staging/staging.yml", + "original_file_path": "models/sources/staging/staging.yml", + "unique_id": "source.my_nesso_project.staging.account", + "fqn": ["my_nesso_project", "sources", "staging", "staging", "account"], + "source_name": "staging", + "source_description": "The input layer to the data modelling project.", + "loader": "", + "identifier": "account", + "quoting": { + "database": null, + "schema": null, + "identifier": null, + "column": null + }, + "loaded_at_field": "_viadot_downloaded_at_utc::timestamp", + "freshness": { + "warn_after": { + "count": 24, + "period": "hour" + }, + "error_after": { + "count": 48, + "period": "hour" + }, + "filter": null + }, + "external": null, + "description": "## `account` table\n\n### \ud83d\udcdd Details\n-\n\n### \ud83d\udcda External docs\n-", + "columns": { + "id": { + "name": "id", + "description": "", + "meta": {}, + "data_type": "BIGINT", + "constraints": [], + "quote": true, + "tags": [] + }, + "name": { + "name": "name", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "email": { + "name": "email", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "mobile": { + "name": "mobile", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "country": { + "name": "country", + "description": "", + "meta": {}, + "data_type": "CHARACTER VARYING(256)", + "constraints": [], + "quote": true, + "tags": [] + }, + "_viadot_downloaded_at_utc": { + "name": "_viadot_downloaded_at_utc", + "description": "", + "meta": {}, + "data_type": "TIMESTAMP_NS", + "constraints": [], + "quote": true, + "tags": [] + } + }, + "meta": { + "owners": [ + { + "type": "Technical owner", + "email": "None" + }, + { + "type": "Business owner", + "email": "None" + } + ], + "domains": [], + "true_source": [], + "SLA": "24 hours" + }, + "source_meta": {}, + "tags": [], + "config": { + "enabled": true + }, + "patch_path": null, + "unrendered_config": {}, + "relation_name": "\"nesso\".\"staging\".\"account\"", + "created_at": 1711458073.9678516 + } + }, + "macros": { + "macro.my_nesso_project.hash": { + "name": "hash", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/hash_column.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/hash_column.sql", + "unique_id": "macro.my_nesso_project.hash", + "macro_sql": "{%- macro hash(field) -%} {{ return(adapter.dispatch(\"hash\", \"dbt\")(field)) }} {%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.my_nesso_project.default__hash"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8041039, + "supported_languages": null + }, + "macro.my_nesso_project.default__hash": { + "name": "default__hash", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/hash_column.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/hash_column.sql", + "unique_id": "macro.my_nesso_project.default__hash", + "macro_sql": "{%- macro default__hash(field) -%}\n md5(cast({{ adapter.quote(field) }} as {{ api.Column.translate_type(\"string\") }}))\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8045254, + "supported_languages": null + }, + "macro.my_nesso_project.databricks__hash": { + "name": "databricks__hash", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/hash_column.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/hash_column.sql", + "unique_id": "macro.my_nesso_project.databricks__hash", + "macro_sql": "{%- macro databricks__hash(field) -%}\n sha2(cast({{ adapter.quote(field) }} as {{ api.Column.translate_type(\"string\") }}), 256)\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8048182, + "supported_languages": null + }, + "macro.my_nesso_project.sqlserver__hash": { + "name": "sqlserver__hash", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/hash_column.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/hash_column.sql", + "unique_id": "macro.my_nesso_project.sqlserver__hash", + "macro_sql": "{%- macro sqlserver__hash(field) -%}\n HASHBYTES(\n 'SHA2_256', cast({{ adapter.quote(field) }} as {{ api.Column.translate_type(\"string\") }})\n )\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8051467, + "supported_languages": null + }, + "macro.my_nesso_project.create_description_markdown": { + "name": "create_description_markdown", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/create_description_markdown.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/create_description_markdown.sql", + "unique_id": "macro.my_nesso_project.create_description_markdown", + "macro_sql": "{% macro create_description_markdown(relation_name=none, docs_name=none, schema=none) %}\n\n {% if docs_name is none %} {% set docs_name = schema + \"_\" + relation_name %} {% endif %}\n\n {% if execute %}\n {{ print(\"{% docs \" + docs_name + \" %}\") }}\n {{ print(\"## `\" + relation_name + \"` table\") }}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcdd Details\") }}\n {{ print(\"-\") }}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcda External docs\") }}\n {{ print(\"-\") }}\n {{ print(\"{% enddocs %}\") }}\n {%- endif -%}\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.806953, + "supported_languages": null + }, + "macro.my_nesso_project.print_profile_docs": { + "name": "print_profile_docs", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "unique_id": "macro.my_nesso_project.print_profile_docs", + "macro_sql": "{% macro print_profile_docs(\n relation=none,\n relation_name=none,\n docs_name=none,\n schema=none,\n database=none,\n exclude_measures=[],\n include_columns=[],\n exclude_columns=[],\n max_rows=none,\n max_columns=13,\n max_column_width=30,\n max_precision=none\n) %}\n {%- set results = get_profile_table(\n relation=relation,\n relation_name=relation_name,\n schema=schema,\n database=database,\n exclude_measures=exclude_measures,\n include_columns=include_columns,\n exclude_columns=exclude_columns,\n ) -%}\n\n {% if docs_name is none %} {% set docs_name = schema + \"_\" + relation_name %} {% endif %}\n\n {% if execute %}\n {{ print(\"{% docs \" + docs_name + \" %}\") }}\n {{ print(\"## `\" + relation_name + \"` table\") }}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcdd Details\") }}\n {{ print(\"-\") }}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcca Profiling\") }}\n {% do results.print_table(\n max_rows=max_rows,\n max_columns=max_columns,\n max_column_width=max_column_width,\n max_precision=max_precision,\n ) %}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcda External docs\") }}\n {{ print(\"-\") }}\n {{ print(\"{% enddocs %}\") }}\n {%- endif -%}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.my_nesso_project.get_profile_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8356276, + "supported_languages": null + }, + "macro.my_nesso_project.get_profile_table": { + "name": "get_profile_table", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "unique_id": "macro.my_nesso_project.get_profile_table", + "macro_sql": "{% macro get_profile_table(\n relation=none,\n relation_name=none,\n schema=none,\n database=none,\n exclude_measures=[],\n include_columns=[],\n exclude_columns=[]\n) %}\n\n {%- set relation = dbt_profiler.get_relation(\n relation=relation, relation_name=relation_name, schema=schema, database=database\n ) -%}\n {%- set profile_sql = get_profile(\n relation=relation,\n exclude_measures=exclude_measures,\n include_columns=include_columns,\n exclude_columns=exclude_columns,\n ) -%}\n {{ log(profile_sql, info=False) }}\n {% set results = run_query(profile_sql) %}\n {% set results = results.rename(results.column_names | map(\"lower\")) %}\n {% do return(results) %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.get_relation", + "macro.my_nesso_project.get_profile", + "macro.dbt.run_query" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8369079, + "supported_languages": null + }, + "macro.my_nesso_project.databricks__type_string": { + "name": "databricks__type_string", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "unique_id": "macro.my_nesso_project.databricks__type_string", + "macro_sql": "\n\n\n{%- macro databricks__type_string() -%} string {%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.837046, + "supported_languages": null + }, + "macro.my_nesso_project.get_profile": { + "name": "get_profile", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "unique_id": "macro.my_nesso_project.get_profile", + "macro_sql": "{% macro get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[]) %}\n {{\n return(\n adapter.dispatch(\"get_profile\", macro_namespace=\"dbt_profiler\")(\n relation, exclude_measures, include_columns, exclude_columns\n )\n )\n }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.my_nesso_project.default__get_profile"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8375778, + "supported_languages": null + }, + "macro.my_nesso_project.default__get_profile": { + "name": "default__get_profile", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "unique_id": "macro.my_nesso_project.default__get_profile", + "macro_sql": "{% macro default__get_profile(\n relation, exclude_measures=[], include_columns=[], exclude_columns=[]\n) %}\n\n {%- if include_columns and exclude_columns -%}\n {{\n exceptions.raise_compiler_error(\n \"Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.\"\n )\n }}\n {%- endif -%}\n\n {%- set all_measures = [\n \"row_count\",\n \"not_null_proportion\",\n \"distinct_proportion\",\n \"distinct_count\",\n \"is_unique\",\n \"min\",\n \"max\",\n \"avg\",\n \"std_dev_population\",\n \"std_dev_sample\",\n ] -%}\n\n {%- set include_measures = all_measures | reject(\"in\", exclude_measures) -%}\n\n {{ log(\"Include measures: \" ~ include_measures, info=False) }}\n\n {% if execute %}\n {% do dbt_profiler.assert_relation_exists(relation) %}\n\n {{ log(\"Get columns in relation %s\" | format(relation.include()), info=False) }}\n {%- set relation_columns = adapter.get_columns_in_relation(relation) -%}\n {%- set relation_column_names = relation_columns | map(attribute=\"name\") | list -%}\n {{ log(\"Relation columns: \" ~ relation_column_names | join(\", \"), info=False) }}\n\n {%- if include_columns -%}\n {%- set profile_column_names = (\n relation_column_names | select(\"in\", include_columns) | list\n ) -%}\n {%- elif exclude_columns -%}\n {%- set profile_column_names = (\n relation_column_names | reject(\"in\", exclude_columns) | list\n ) -%}\n {%- else -%} {%- set profile_column_names = relation_column_names -%}\n {%- endif -%}\n\n {{ log(\"Profile columns: \" ~ profile_column_names | join(\", \"), info=False) }}\n\n {% set information_schema_columns = run_query(\n dbt_profiler.select_from_information_schema_columns(relation)\n ) %}\n {% set information_schema_columns = information_schema_columns.rename(\n information_schema_columns.column_names | map(\"lower\")\n ) %}\n {% set information_schema_data_types = (\n information_schema_columns.columns[\"data_type\"].values() | map(\"lower\") | list\n ) %}\n {% set information_schema_column_names = (\n information_schema_columns.columns[\"column_name\"].values()\n | map(\"lower\")\n | list\n ) %}\n {% set data_type_map = {} %}\n {% for column_name in information_schema_column_names %}\n {% do data_type_map.update(\n {column_name: information_schema_data_types[loop.index - 1]}\n ) %}\n {% endfor %}\n {{ log(\"Column data types: \" ~ data_type_map, info=False) }}\n\n {% set profile_sql %}\n with source_data as (\n select\n *\n from {{ relation }}\n ),\n\n column_profiles as (\n {% for column_name in profile_column_names %}\n {% set data_type = data_type_map.get(column_name.lower(), \"\") %}\n select \n lower('{{ column_name }}') as column_name,\n nullif(lower('{{ data_type }}'), '') as data_type,\n {% if \"row_count\" not in exclude_measures -%}\n cast(count(*) as numeric) as row_count,\n {%- endif %}\n {% if \"not_null_proportion\" not in exclude_measures -%}\n sum(case when {{ adapter.quote(column_name) }} is null then 0 else 1 end) / cast(count(*) as numeric) as not_null_proportion,\n {%- endif %}\n {% if \"distinct_proportion\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) / cast(count(*) as numeric) as distinct_proportion,\n {%- endif %}\n {% if \"distinct_count\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) as distinct_count,\n {%- endif %}\n {% if \"is_unique\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) = count(*) as is_unique,\n {%- endif %}\n {% if \"min\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type) %}cast(min({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }}){% else %}null{% endif %} as min,\n {%- endif %}\n {% if \"max\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type) %}cast(max({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }}){% else %}null{% endif %} as max,\n {%- endif %}\n {% if \"avg\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}avg({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as avg,\n {%- endif %}\n {% if \"std_dev_population\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}stddev_pop({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as std_dev_population,\n {%- endif %}\n {% if \"std_dev_sample\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}stddev_samp({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as std_dev_sample,\n {%- endif %}\n cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at,\n {{ loop.index }} as _column_position\n from source_data\n\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n\n select\n column_name,\n data_type,\n {% for measure in include_measures %}\n {{ measure }},\n {% endfor %}\n profiled_at\n from column_profiles\n order by _column_position asc\n {% endset %}\n\n {% do return(profile_sql) %}\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.assert_relation_exists", + "macro.dbt.run_query", + "macro.dbt_profiler.select_from_information_schema_columns", + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_date_or_time_dtype", + "macro.dbt_profiler.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8446453, + "supported_languages": null + }, + "macro.my_nesso_project.databricks__get_profile": { + "name": "databricks__get_profile", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/dbt_profiler.sql", + "unique_id": "macro.my_nesso_project.databricks__get_profile", + "macro_sql": "{% macro databricks__get_profile(\n relation, exclude_measures=[], include_columns=[], exclude_columns=[]\n) %}\n\n {%- if include_columns and exclude_columns -%}\n {{\n exceptions.raise_compiler_error(\n \"Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.\"\n )\n }}\n {%- endif -%}\n\n {%- set all_measures = [\n \"row_count\",\n \"not_null_proportion\",\n \"distinct_proportion\",\n \"distinct_count\",\n \"is_unique\",\n \"min\",\n \"max\",\n \"avg\",\n \"std_dev_population\",\n \"std_dev_sample\",\n ] -%}\n\n {%- set include_measures = all_measures | reject(\"in\", exclude_measures) -%}\n\n {{ log(\"Include measures: \" ~ include_measures, info=False) }}\n\n {% if execute %}\n {% do dbt_profiler.assert_relation_exists(relation) %}\n\n {{ log(\"Get columns in relation %s\" | format(relation.include()), info=False) }}\n {%- set relation_columns = adapter.get_columns_in_relation(relation) -%}\n {%- set relation_column_names = relation_columns | map(attribute=\"name\") | list -%}\n {{ log(\"Relation columns: \" ~ relation_column_names | join(\", \"), info=False) }}\n\n {%- if include_columns -%}\n {%- set profile_column_names = (\n relation_column_names | select(\"in\", include_columns) | list\n ) -%}\n {%- elif exclude_columns -%}\n {%- set profile_column_names = (\n relation_column_names | reject(\"in\", exclude_columns) | list\n ) -%}\n {%- else -%} {%- set profile_column_names = relation_column_names -%}\n {%- endif -%}\n\n {{ log(\"Profile columns: \" ~ profile_column_names | join(\", \"), info=False) }}\n\n {# Get column metadata. #}\n {% call statement(\"table_metadata\", fetch_result=True) -%}\n describe table extended {{ relation.schema }}.{{ relation.identifier }}\n {% endcall %}\n {% set columns_metadata = load_result('table_metadata').table %}\n {% set columns_metadata = columns_metadata.rename(columns_metadata.column_names | map('lower')) %}\n\n {% set data_types = columns_metadata.columns['data_type'].values() | map('lower') | list %}\n {% set column_names = columns_metadata.columns['col_name'].values() | map('lower') | list %}\n {% set data_type_map = {} %}\n {% for column_name in column_names %}\n {% do data_type_map.update({column_name: data_types[loop.index-1]}) %}\n {% endfor %}\n {{ log(\"Column data types: \" ~ data_type_map, info=False) }}\n\n {% set profile_sql %}\n with source_data as (\n select\n *\n from {{ relation }}\n ),\n\n column_profiles as (\n {% for column_name in profile_column_names %}\n {% set data_type = data_type_map.get(column_name.lower(), \"\") %}\n select \n lower('{{ column_name }}') as column_name,\n nullif(lower('{{ data_type }}'), '') as data_type,\n {% if \"row_count\" not in exclude_measures -%}\n cast(count(*) as numeric) as row_count,\n {%- endif %}\n {% if \"not_null_proportion\" not in exclude_measures -%}\n sum(case when {{ adapter.quote(column_name) }} is null then 0 else 1 end) / cast(count(*) as numeric) as not_null_proportion,\n {%- endif %}\n {% if \"distinct_proportion\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) / cast(count(*) as numeric) as distinct_proportion,\n {%- endif %}\n {% if \"distinct_count\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) as distinct_count,\n {%- endif %}\n {% if \"is_unique\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) = count(*) as is_unique,\n {%- endif %}\n {% if \"min\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type) %}cast(min({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }}){% else %}null{% endif %} as min,\n {%- endif %}\n {% if \"max\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type) %}cast(max({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }}){% else %}null{% endif %} as max,\n {%- endif %}\n {% if \"avg\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}avg({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as avg,\n {%- endif %}\n {% if \"std_dev_population\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}stddev_pop({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as std_dev_population,\n {%- endif %}\n {% if \"std_dev_sample\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}stddev_samp({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as std_dev_sample,\n {%- endif %}\n cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at,\n {{ loop.index }} as _column_position\n from source_data\n\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n\n select\n column_name,\n data_type,\n {% for measure in include_measures %}\n {{ measure }},\n {% endfor %}\n profiled_at\n from column_profiles\n order by _column_position asc\n {% endset %}\n\n {# {{ print(profile_sql) }} #}\n {% do return(profile_sql) %}\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.assert_relation_exists", + "macro.dbt.statement", + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_date_or_time_dtype", + "macro.dbt_profiler.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8519063, + "supported_languages": null + }, + "macro.my_nesso_project.generate_column_yaml": { + "name": "generate_column_yaml", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/generate_model_yaml_boilerplate.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/generate_model_yaml_boilerplate.sql", + "unique_id": "macro.my_nesso_project.generate_column_yaml", + "macro_sql": "{% macro generate_column_yaml(\n column,\n model_yaml,\n columns_metadata_dict,\n parent_column_name=\"\",\n include_pii_tag=True,\n include_data_types=True,\n snakecase_columns=True\n) %}\n {{ log(\"Generating YAML for column '\" ~ column.name ~ \"'...\") }}\n {% if parent_column_name %} {% set column_name = parent_column_name ~ \".\" ~ column.name %}\n {% else %} {% set column_name = column.name %}\n {% endif %}\n\n {% set column_metadata_dict = columns_metadata_dict.get(column.name, {}) %}\n {% if include_pii_tag %} {% set tags = column_metadata_dict.get(\"tags\", []) %}\n {% else %}\n {% set tags = column_metadata_dict.get(\"tags\", []) | reject(\"equalto\", \"PII\") | list %}\n {% endif %}\n\n {% if snakecase_columns %}\n {% do model_yaml.append(\" - name: \" ~ adapter.quote(snake_case(column.name))) %}\n {% else %} {% do model_yaml.append(\" - name: \" ~ adapter.quote(column.name)) %}\n {% endif %}\n {% do model_yaml.append(\" quote: true\") %}\n {% if include_data_types %}\n {% do model_yaml.append(\n \" data_type: \" ~ (column.data_type | upper)\n ) %}\n {% endif %}\n {% do model_yaml.append(\n ' description: \"' ~ column_metadata_dict.get(\"description\", \"\") ~ '\"'\n ) %}\n {% do model_yaml.append(\" # tests:\") %}\n {% do model_yaml.append(\" # - unique\") %}\n {% do model_yaml.append(\" # - not_null\") %}\n {% do model_yaml.append(\" tags: \" ~ tags) %}\n {% do model_yaml.append(\"\") %}\n\n {% if column.fields | length > 0 %}\n {% for child_column in column.fields %}\n {% set model_yaml = generate_column_yaml(\n child_column,\n model_yaml,\n column_metadata_dict,\n parent_column_name=column_name,\n ) %}\n {% endfor %}\n {% endif %}\n {% do return(model_yaml) %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.my_nesso_project.snake_case"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.859862, + "supported_languages": null + }, + "macro.my_nesso_project.generate_model_yaml": { + "name": "generate_model_yaml", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/generate_model_yaml_boilerplate.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/generate_model_yaml_boilerplate.sql", + "unique_id": "macro.my_nesso_project.generate_model_yaml", + "macro_sql": "{% macro generate_model_yaml(\n model_name,\n technical_owner=\"None\",\n business_owner=\"None\",\n domains=[],\n source_systems=[],\n tags=[],\n upstream_metadata=True,\n include_sla=True,\n include_pii_tag=False,\n include_data_types=True,\n snakecase_columns=True,\n base_model_prefix=none,\n bootstrapped_base_model=False\n) %}\n {# \nGenerate model YAML template.\n\nArgs:\n model_name (str): The name of the model for which to generate the template.\n technical_owner (str, optional): The technical owner of the model.\n business_owner (str, optional): The business owner of the model.\n domains (List[str]): The domains the model belongs to.\n source_systems (List[str]): Sources from which the table originates, e.g., SQL Server, BigQuery, etc.\n tags (List[str]): The tags to attach to the model.\n upstream_metadata (bool, optional): Whether to inherit upstream model metadata.\n include_sla (bool, optional): Whether to include the SLA meta key.\n include_pii_tag (bool, optional): Whether to include the PII tag.\n include_data_types (bool, optional): Whether to include the data types of column.\n This may be useful when PII columns are already masked in the base model.\n snakecase_columns (bool, optional): Whether to standardize upstream column names\n to snakecase in the model.\n base_model_prefix (str, optional): Prefix to apply to the name of the base model.\n Defaults to empty string (no prefix).\n bootstrapped_base_model (bool, optional): Determines whether the base model was built using \n the `base_model bootstrap` command.\n#} \n \n {# Set to True to enable debugging. #}\n {% set info=False %}\n\n {{\n log(\n \"generate_model_yaml | Generating model YAML for model '\"\n ~ model_name\n ~ \"'...\",\n info=info\n )\n }}\n\n {% if upstream_metadata %}\n {% set upstream_model_metadata = get_parent_source_or_model_metadata(model_name) %}\n {{\n log(\n \"generate_model_yaml | Got upstream model metadata:\\n\\n\"\n ~ upstream_model_metadata\n ~ \"\\n\",\n info=info\n )\n }}\n {# {% set metadata_resolved = resolve_upstream_metadata(upstream_models_metadata) %}\n {{\n log(\n \"generate_model_yaml() | Resolved upstream metadata: \\n\\n\"\n ~ metadata_resolved\n ~ \"\\n\",\n info=info\n )\n }} #}\n {% else %}\n {# {% set metadata_resolved = {} %} #}\n {% set upstream_model_metadata = {} %}\n {% endif %}\n \n \n {% set dependencies = get_model_dependencies(model_name) %}\n {% set upstream_model_type = dependencies[\"type\"] %}\n\n {% if base_model_prefix is none %}\n {% set base_model_prefix = \"\" %}\n {% else %}\n {% if base_model_prefix and not base_model_prefix.endswith(\"_\") %}\n {% set base_model_prefix = base_model_prefix ~ \"_\" %}\n {% endif %}\n {% set model_name = base_model_prefix ~ model_name %}\n {% endif %}\n\n {{ log(\"generate_model_yaml | Base model prefix: \" ~ base_model_prefix, info=info) }}\n\n {# Table metadata. #}\n {% set model_yaml = [] %}\n {% do model_yaml.append(\"version: 2\") %}\n {% do model_yaml.append(\"\") %}\n {% do model_yaml.append(\"models:\") %}\n\n {% do model_yaml.append(\" - name: \" ~ model_name | lower) %}\n\n {% if upstream_model_type == \"source\" %}\n {% do model_yaml.append(\" description: Base model of the `\" ~ model_name | replace(base_model_prefix, \"\") ~ \"` table.\") %}\n {% else %} {% do model_yaml.append(' description: \"\"') %}\n {% endif %}\n\n {# {% set tags = metadata_resolved.get(\"tags\", tags) %}\n\n {% if tags %}\n {% do model_yaml.append(' config:')%}\n {% do model_yaml.append(' tags: ' ~ tags)%}\n {% endif %} #}\n\n {{ log(\"generate_model_yaml | Adding meta key...\", info=info) }}\n\n {% do model_yaml.append(\" meta:\") %}\n {% if upstream_model_metadata %}\n {% set meta = upstream_model_metadata.get(\"meta\", {}) %}\n {# {% set meta = metadata_resolved.get(\"meta\", {}) %} #}\n {% else %} {% set meta = {} %}\n {% endif %}\n\n {# Extract owners from metadata. #}\n {# Jinja forgets variables defined in loops -- but it has a concept of namespace as a workaround. #}\n {% set ns = namespace(technical_owner=technical_owner, business_owner=business_owner) %} \n\n {{ log(\"generate_model_yaml | Getting owner metadata...\", info=info) }}\n\n {% if (technical_owner == \"None\" or business_owner == \"None\") and meta %}\n\n {% for owner_meta in meta.get(\"owners\") %}\n {% set typ = owner_meta.get(\"type\") %}\n {% set email = owner_meta.get(\"email\") %}\n\n {% if typ == \"Technical owner\" %}\n {# {{ print(\"Setting technical owner to \" ~ email)}} #}\n {% if not technical_owner or technical_owner == \"None\" %}\n {% set ns.technical_owner = email %}\n {% endif %}\n {% elif typ == \"Business owner\" %}\n {# {{ print(\"Setting business owner to \" ~ email)}} #}\n {% if not business_owner or business_owner == \"None\" %}\n {% set ns.business_owner = email %}\n {% endif %}\n {% endif %}\n\n {% endfor %}\n {% endif %}\n\n {% do model_yaml.append(\" owners:\") %}\n {% do model_yaml.append(\" - type: Technical owner\") %}\n {% do model_yaml.append(\" email: \" ~ ns.technical_owner) %}\n {% do model_yaml.append(\" - type: Business owner\") %}\n {% do model_yaml.append(\" email: \" ~ ns.business_owner) %}\n {% do model_yaml.append(\" domains: \" ~ meta.get(\"domains\", domains)) %}\n {% do model_yaml.append(\" true_source: \" ~ meta.get(\"true_source\", source_systems)) %}\n\n {% if include_sla %}\n {% do model_yaml.append(\" SLA: \" ~ meta.get(\"SLA\", \"24 hours\")) %}\n {% endif %}\n\n {{ log(\"generate_model_yaml | Meta key added.\", info=info) }}\n\n {% do model_yaml.append(\" columns:\") %}\n\n {# Separates base models created using bootstrap command\n because they can multiple parent sources and models. #}\n {% if upstream_model_type == \"source\" and not bootstrapped_base_model %}\n {% set schema = dependencies[\"node\"].split(\".\")[-2] %}\n {% set relation = source(schema, model_name | replace(base_model_prefix, \"\")) %}\n {% else %} {% set relation = ref(model_name) %}\n {% endif %}\n\n {{ log(\"generate_model_yaml| Retrieving the list of columns...\", info=info) }}\n\n {%- set columns = adapter.get_columns_in_relation(relation) -%}\n\n {# Column metadata. #}\n {% if meta %}\n {{ log(\"generate_model_yaml | Retrieving column metadata...\", info=info) }}\n {% set columns_metadata_dict = (\n get_parent_source_or_model_column_metadata(\n model_name | replace(base_model_prefix, \"\")\n )\n if upstream_metadata\n else {}\n ) %}\n {{\n log(\n \"generate_model_yaml | Successfully retrieved column metadata:\\n\"\n ~ columns_metadata_dict,\n info=info\n )\n }}\n {% else %} {% set columns_metadata_dict = {} %}\n {% endif %}\n\n {{ log(\"generate_model_yaml | Generating column YAML...\", info=info) }}\n {% for column in columns %}\n {{ \n log(\n \"generate_model_yaml() | Generating YAML for column: \"\n ~ column,\n info=info\n )\n }}\n {% set model_yaml = generate_column_yaml(\n column,\n model_yaml,\n columns_metadata_dict,\n include_data_types=include_data_types,\n include_pii_tag=False,\n snakecase_columns=True,\n ) %}\n {{ log(\"generate_model_yaml() | Generated YAML: \" ~ model_yaml, info=info) }}\n {% endfor %}\n {{ log(\"generate_model_yaml | Successfully generated column YAML.\", info=info) }}\n \n {%- if execute -%}\n\n {%- set joined = model_yaml | join(\"\\n\") -%}\n\n {{ print(joined) }}\n {{ log(\"generate_model_yaml() | Final metadata:\\n\\n\" ~ joined, info=info) }}\n\n {%- do return(joined) -%}\n\n {%- endif -%}\n\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [ + "macro.my_nesso_project.get_parent_source_or_model_metadata", + "macro.my_nesso_project.get_model_dependencies", + "macro.my_nesso_project.get_parent_source_or_model_column_metadata", + "macro.my_nesso_project.generate_column_yaml" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8682132, + "supported_languages": null + }, + "macro.my_nesso_project.resolve_upstream_metadata": { + "name": "resolve_upstream_metadata", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/generate_model_yaml_boilerplate.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/generate_model_yaml_boilerplate.sql", + "unique_id": "macro.my_nesso_project.resolve_upstream_metadata", + "macro_sql": "{% macro resolve_upstream_metadata(metadata) %}\n+ {# Set to True to enable logging to console #}\n+ {% set info = False %}\n+ {# \n+ Merge upstream metadata using the following logic:\n+ - fields of type string are taken from the first model in the list\n+ - fields of type list are merged together\n+ - for dict fields, same rules are applied to their subfields\n+ #}\n+\n+ {{ log(\"resolve_upstream_metadata() | Got metadata:\\n\\n\" ~ metadata ~ \"\\n\", info=info) }}\n+\n+ {% set metadata_resolved = {} %}\n+ {% for model_name in metadata %}\n+ {{ log(\"resolve_upstream_metadata() | Processing model '\" ~ model_name ~ \"'...\", info=info) }}\n+ {% set model_metadata = metadata[model_name] %}\n+\n+ {{ log(\"resolve_upstream_metadata() | Got model metadata: \\n\\n\" ~ model_metadata ~ \"\\n\", info=info) }}\n+\n+ {% for field in model_metadata %}\n+ {# Workaround because dbt jinja doesn't have the `continue` loop control. #}\n+ {% set continue_tracker = namespace(should_continue = True) %}\n+ {% set field_content = model_metadata[field] %}\n+ {% if field not in metadata_resolved %}\n+ {% do metadata_resolved.update({field: field_content}) %}\n+ {% else %}\n+ {% if field_content is string %}\n+ {# String - keep the value from the first encountered upstream,\n+ as there's no way to decide which is the correct one. #}\n+\n+ {{ log(\"resolve_upstream_metadata() | String field found: \" ~ field ~ \": \" ~ field_content, info=info) }}\n+ \n+ {% set continue_tracker.should_continue = False %}\n+ {% elif field_content is mapping and continue_tracker.should_continue %}\n+ {# A dictionary - merge the keys. #}\n+\n+ {{ log(\"resolve_upstream_metadata() | Dict field found: \" ~ field ~ \": \" ~ field_content, info=info) }}\n+\n+ {% for subfield in field_content %}\n+ {% set subfield_content = field_content[subfield] %}\n+ {% set continue_tracker2 = namespace(should_continue = True) %}\n+ {# Each key in the dictionary can also be a string, \n+ list, or dict. We apply the same rules as to top-level fields.#}\n+ {% if subfield_content is string %}\n+ {% set continue_tracker2.should_continue = False %}\n+ {% elif subfield_content is mapping and continue_tracker2.should_continue %}\n+ {% do metadata_resolved[field].update({subfield: subfield_content}) %}\n+ {% elif subfield_content is iterable and continue_tracker2.should_continue %}\n+ {% for key in subfield_content %}\n+ {% if key not in metadata_resolved[field][subfield] %}\n+ {% do metadata_resolved[field][subfield].append(key) %}\n+ {% endif %}\n+ {% endfor %}\n+ {% else %}\n+ {% do metadata_resolved[field].update({subfield: model_metadata[field]}) %} \n+ {% endif %}\n+ {% endfor %}\n+ {% elif field_content is iterable and continue_tracker.should_continue %}\n+ {# A list - append all unique items into the final list. #}\n+ \n+ {{ log(\"resolve_upstream_metadata() | List field found: \" ~ field ~ \": \" ~ field_content, info=info) }}\n+\n+ {% for key in field_content %}\n+ {% if key not in metadata_resolved[field] %}\n+ {% do metadata_resolved[field].append(key) %}\n+ {% endif %}\n+ {% endfor %}\n+ {% else %}\n+ {% do metadata_resolved.update({field: model_metadata[field]}) %} \n+ {% endif %}\n+ {% endif %}\n+ {% endfor %}\n+ {% endfor %}\n+\n+ {{ log(\"resolve_upstream_metadata() | Resolved metadata:\\n\\n\" ~ metadata_resolved ~ \"\\n\", info=info) }}\n+\n+ {% do return(metadata_resolved) %}\n+\n+{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8731985, + "supported_languages": null + }, + "macro.my_nesso_project.get_tables_in_schema": { + "name": "get_tables_in_schema", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/generate_source_yaml_boilerplate.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/generate_source_yaml_boilerplate.sql", + "unique_id": "macro.my_nesso_project.get_tables_in_schema", + "macro_sql": "{% macro get_tables_in_schema(\n schema_name,\n database_name=target.database,\n table_pattern=\"%\",\n exclude=\"\",\n print_result=False\n) %}\n\n {% set tables = dbt_utils.get_relations_by_pattern(\n schema_pattern=schema_name,\n database=database_name,\n table_pattern=table_pattern,\n exclude=exclude,\n ) %}\n\n {% set table_list = tables | map(attribute=\"identifier\") %}\n\n {% if print_result %} {{ print(table_list | join(\",\")) }} {% endif %}\n\n {{ return(table_list | sort) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.get_relations_by_pattern"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.877712, + "supported_languages": null + }, + "macro.my_nesso_project.generate_source": { + "name": "generate_source", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/generate_source_yaml_boilerplate.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/generate_source_yaml_boilerplate.sql", + "unique_id": "macro.my_nesso_project.generate_source", + "macro_sql": "{% macro generate_source(\n schema_name,\n technical_owner=none,\n business_owner=none,\n domains=[],\n source_systems=[],\n database_name=target.database,\n generate_columns=True,\n include_descriptions=True,\n include_data_types=True,\n include_table_profiling=True,\n include_sla=True,\n include_freshness=True,\n loaded_at_field=\"_viadot_downloaded_at_utc::timestamp\",\n freshness={\n \"warn_after\": \"{ count: 24, period: hour }\",\n \"error_after\": \"{ count: 48, period: hour }\",\n },\n table_pattern=\"%\",\n exclude=\"\",\n name=schema_name,\n table_names=None,\n case_sensitive_cols=True\n) %}\n {# The default table_pattern is adapted to the postgres database. Make sure it also matches the database you intend to use #}\n ,\n\n {% set sources_yaml = [] %}\n\n {% if table_names is none %}\n {% do sources_yaml.append(\"version: 2\") %}\n {% do sources_yaml.append(\"\") %}\n {% do sources_yaml.append(\"sources:\") %}\n {% do sources_yaml.append(\" - name: \" ~ name | lower) %}\n\n {% if database_name != target.database %}\n {% do sources_yaml.append(\" database: \" ~ database_name | lower) %}\n {% endif %}\n\n {% do sources_yaml.append(\" schema: \" ~ schema_name | lower) %}\n {% if include_descriptions %}\n {% do sources_yaml.append(' description: \"\"') %}\n {% endif %}\n {% do sources_yaml.append(\"\\n tables:\") %}\n\n {% set tables = get_tables_in_schema(schema_name, database_name, table_pattern, exclude) %}\n {% else %} {% set tables = table_names %}\n\n {% endif %}\n\n {% if table_names %} {% do sources_yaml.append(\"\") %} {% endif %}\n\n {% for table in tables %}\n {% do sources_yaml.append(\"\\n - name: \" ~ table | lower) %}\n {% if include_descriptions %}\n \n {% if include_table_profiling %}\n {# Note that the doc must already exist. You can generate it beforehand with dbt-profiler. #}\n {% do sources_yaml.append(' description: ' ~ \"'\" ~ '{{ doc(\"' ~ schema_name ~ \"_\" ~ table ~ '\") }}' ~ \"'\") %}\n {% else %}\n {% do sources_yaml.append(' description: \"\"') %}\n {% endif %}\n\n {% endif %}\n\n {% if include_freshness %}\n {% do sources_yaml.append(\" loaded_at_field: \" ~ loaded_at_field) %}\n {% do sources_yaml.append(\" freshness:\") %}\n {% do sources_yaml.append(\" warn_after: \" ~ freshness.get(\"warn_after\", \"\")) %}\n {% do sources_yaml.append(\n \" error_after: \" ~ freshness.get(\"error_after\", \"\")\n ) %}\n {% endif %}\n\n {% do sources_yaml.append(\" tags: []\") %}\n\n {% do sources_yaml.append(\" meta:\") %}\n {% do sources_yaml.append(\" owners:\") %}\n {% do sources_yaml.append(\" - type: Technical owner\") %}\n {% do sources_yaml.append(\" email: \" ~ technical_owner) %}\n {% do sources_yaml.append(\" - type: Business owner\") %}\n {% do sources_yaml.append(\" email: \" ~ business_owner) %}\n {% do sources_yaml.append(\" domains: \" ~ domains) %}\n {% do sources_yaml.append(\" true_source: \" ~ source_systems) %}\n\n {% if include_sla %} {% do sources_yaml.append(' SLA: \"24 hours\"') %} {% endif %}\n\n {% if generate_columns %}\n {% do sources_yaml.append(\" columns:\") %}\n\n {% set table_relation = api.Relation.create(\n database=database_name, schema=schema_name, identifier=table\n ) %}\n\n {% set columns = adapter.get_columns_in_relation(table_relation) %}\n {% for column in columns %}\n {% if case_sensitive_cols %}\n {% do sources_yaml.append(\" - name: \" ~ adapter.quote(column.name)) %}\n {% else %}\n {% do sources_yaml.append(\n \" - name: \" ~ adapter.quote(column.name) | lower\n ) %}\n {% endif %}\n {% do sources_yaml.append(\" quote: true\") %}\n {% if include_data_types %}\n {% do sources_yaml.append(\n \" data_type: \" ~ (column.data_type | upper)\n ) %}\n {% endif %}\n {% if include_descriptions %}\n {% do sources_yaml.append(' description: \"\"') %}\n {% endif %}\n {% do sources_yaml.append(\" # tests:\") %}\n {% do sources_yaml.append(\" # - unique\") %}\n {% do sources_yaml.append(\" # - not_null\") %}\n {% do sources_yaml.append(\" tags: []\") %}\n {% endfor %}\n {% endif %}\n\n {% endfor %}\n\n {% if execute %}\n\n {% set joined = sources_yaml | join(\"\\n\") %} {{ print(joined) }} {% do return(joined) %}\n\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.my_nesso_project.get_tables_in_schema"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8845384, + "supported_languages": null + }, + "macro.my_nesso_project.generate_schema_name": { + "name": "generate_schema_name", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/get_custom_schema.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/get_custom_schema.sql", + "unique_id": "macro.my_nesso_project.generate_schema_name", + "macro_sql": "{% macro generate_schema_name(custom_schema_name, node) -%}\n {{ generate_schema_name_for_env(custom_schema_name, node) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.generate_schema_name_for_env"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8848908, + "supported_languages": null + }, + "macro.my_nesso_project.get_table_columns": { + "name": "get_table_columns", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/get_table_columns.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/get_table_columns.sql", + "unique_id": "macro.my_nesso_project.get_table_columns", + "macro_sql": "{%- macro get_table_columns(schema_name, table_name, database_name=target.database) -%}\n\n {% set table_relation = api.Relation.create(\n schema=schema_name, identifier=table_name, database=database_name\n ) %}\n\n {% set columns = adapter.get_columns_in_relation(table_relation) %}\n\n\n {% set columns_dict = {} %}\n {% for column in columns %}\n {% set column_name = column.name %}\n {% set data_type = column.data_type | upper %}\n {% do columns_dict.update({column_name: data_type})%}\n {% endfor %}\n\n {% if execute %}\n\n {{ print(columns_dict) }} {% do return(columns_dict) %}\n\n {% endif %}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8862143, + "supported_languages": null + }, + "macro.my_nesso_project.get_source_pii_columns": { + "name": "get_source_pii_columns", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/get_source_pii_columns.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/get_source_pii_columns.sql", + "unique_id": "macro.my_nesso_project.get_source_pii_columns", + "macro_sql": "{% macro get_source_pii_columns(dbt_project, schema, table) %}\n\n {% if execute %}\n\n {% set meta_columns = [] %}\n {% set fqname = \"source\" ~ \".\" ~ dbt_project ~ \".\" ~ schema ~ \".\" ~ table %}\n {% set columns = graph.sources[fqname][\"columns\"] %}\n\n {% for column in columns %}\n {% if \"PII\" in graph.sources[fqname][\"columns\"][column][\"tags\"] %}\n {% do meta_columns.append(column) %}\n {% endif %}\n {% endfor %}\n\n {{ return(meta_columns) }}\n\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8876343, + "supported_languages": null + }, + "macro.my_nesso_project.generate_base_model": { + "name": "generate_base_model", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/generate_base_model.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/generate_base_model.sql", + "unique_id": "macro.my_nesso_project.generate_base_model", + "macro_sql": "{% macro generate_base_model(\n source_name, table_name, dbt_project, snakecase_columns=False, leading_commas=False\n) %}\n\n {%- set source_relation = source(source_name, table_name) -%}\n\n {%- set columns = adapter.get_columns_in_relation(source_relation) -%}\n {%- set column_names = columns | map(attribute=\"name\") -%}\n\n {%- set base_model_sql -%}\nwith _masked as (\n select {{ '\\n ' ~ hash_source_pii_columns(table=table_name, schema=source_name, dbt_project=dbt_project ) | trim }}\n from {{ \"{{ source(\" ~ '\"' ~ source_name ~ '\"' ~ \", \" ~ '\"' ~ table_name ~ '\"' ~ \") }}\" }}\n),\n\nrenamed as (\n select\n {%- if leading_commas -%}\n {%- for column in column_names %}\n {{\", \" if not loop.first}}\n {% if snakecase_columns %}\n {{ adapter.quote(column) ~ ' as ' ~ adapter.quote(snake_case(column)) }}\n {% else %}\n {{ adapter.quote(column) }}\n {% endif %}\n {%- endfor %}\n {% else %}\n {% for column in column_names %}\n {%- if snakecase_columns -%}\n {{ adapter.quote(column) ~ ' as ' ~ adapter.quote(snake_case(column)) }}\n {%- else -%}\n {{ adapter.quote(column) }}\n {%- endif -%}\n {{\",\" if not loop.last}}\n {% endfor %}\n {%- endif %}\n from _masked\n)\n\nselect * from renamed\n {%- endset -%}\n\n {% if execute %} {{ print(base_model_sql) }} {% do return(base_model_sql) %} {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.my_nesso_project.hash_source_pii_columns", + "macro.my_nesso_project.snake_case" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8906016, + "supported_languages": null + }, + "macro.my_nesso_project.hash_source_pii_columns": { + "name": "hash_source_pii_columns", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/hash_source_pii_columns.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/hash_source_pii_columns.sql", + "unique_id": "macro.my_nesso_project.hash_source_pii_columns", + "macro_sql": "{%- macro hash_source_pii_columns(dbt_project, schema, table=None) -%}\n\n {%- set pii_columns = get_source_pii_columns(\n dbt_project=dbt_project, schema=schema, table=table\n ) -%}\n\n {% for column in pii_columns %}\n {{ hash(column) | indent(4) }} as {{ adapter.quote(column) }},\n {{ \"\\n\" if not loop.last else \"\\n \" }}\n {%- endfor -%}\n {{ dbt_utils.star(from=source(schema, table), except=pii_columns) | indent(4) | trim }}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.my_nesso_project.get_source_pii_columns", + "macro.my_nesso_project.hash", + "macro.dbt_utils.star" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8916154, + "supported_languages": null + }, + "macro.my_nesso_project.generate_seed_schema_yaml": { + "name": "generate_seed_schema_yaml", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/generate_seed_yaml_boilerplate.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/generate_seed_yaml_boilerplate.sql", + "unique_id": "macro.my_nesso_project.generate_seed_schema_yaml", + "macro_sql": "{% macro generate_seed_schema_yaml() %}\n\n {% set yaml = [] %}\n {% do yaml.append(\"version: 2\") %}\n {% do yaml.append(\"\") %}\n {% do yaml.append(\"seeds: []\") %}\n\n {% if execute %}\n {% set joined = yaml | join(\"\\n\") %} {{ print(joined) }} {% do return(joined) %}\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.8944848, + "supported_languages": null + }, + "macro.my_nesso_project.generate_seed_yaml": { + "name": "generate_seed_yaml", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/generate_seed_yaml_boilerplate.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/generate_seed_yaml_boilerplate.sql", + "unique_id": "macro.my_nesso_project.generate_seed_yaml", + "macro_sql": "{% macro generate_seed_yaml(\n seed,\n database_name=target.database,\n schema_name=target.schema,\n generate_columns=True,\n include_tags=False,\n include_owners=True,\n technical_owner=\"\",\n business_owner=\"\",\n domains=[],\n source_systems=[],\n case_sensitive_cols=True\n) %}\n\n {% set yaml = [] %}\n\n {% do yaml.append(\" - name: \" ~ seed | lower) %}\n {% do yaml.append(' description: \"\"') %}\n\n {% if include_tags %} {% do yaml.append(\" tags: []\") %} {% endif %}\n\n {% if include_owners %}\n {% do yaml.append(\" meta:\") %}\n {% do yaml.append(\" owners:\") %}\n {% do yaml.append(\" - type: Technical owner\") %}\n {% do yaml.append(\" email: \" ~ technical_owner) %}\n {% do yaml.append(\" - type: Business owner\") %}\n {% do yaml.append(\" email: \" ~ business_owner) %}\n {% do yaml.append(\" domains: \" ~ domains) %}\n {% do yaml.append(\" true_source: \" ~ source_systems) %}\n {% endif %}\n\n {% if generate_columns %}\n {% do yaml.append(\" columns:\") %}\n\n {% set table_relation = api.Relation.create(\n database=database_name, schema=schema_name, identifier=seed\n ) %}\n {% set columns = adapter.get_columns_in_relation(table_relation) %}\n {% for column in columns %}\n {% if case_sensitive_cols %}\n {% do yaml.append(\" - name: \" ~ column.name) %}\n {% do yaml.append(\" quote: true\") %}\n {% else %} {% do yaml.append(\" - name: \" ~ column.name | lower) %}\n {% endif %}\n {% do yaml.append(' description: \"\"') %}\n {% do yaml.append(\" # tests:\") %}\n {% do yaml.append(\" # - unique\") %}\n {% do yaml.append(\" # - not_null\") %}\n {% do yaml.append(\" # - accepted_values:\") %}\n {% do yaml.append(' # values: [\"value1\", \"value2\"]') %}\n {% endfor %}\n\n {% endif %}\n\n {% if execute %}\n {% set joined = yaml | join(\"\\n\") %} {{ print(joined) }} {% do return(joined) %}\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.898067, + "supported_languages": null + }, + "macro.my_nesso_project.redshift__list_relations_without_caching": { + "name": "redshift__list_relations_without_caching", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/redshift_external_tables_fix.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/redshift_external_tables_fix.sql", + "unique_id": "macro.my_nesso_project.redshift__list_relations_without_caching", + "macro_sql": "{% macro redshift__list_relations_without_caching(schema_relation) %}\n\n {% call statement('list_relations_without_caching', fetch_result=True) -%}\n select\n table_catalog as database,\n table_name as name,\n table_schema as schema,\n 'table' as type\n from information_schema.tables\n where table_schema ilike '{{ schema_relation.schema }}'\n and table_type = 'BASE TABLE'\n union all\n select\n table_catalog as database,\n table_name as name,\n table_schema as schema,\n case\n when view_definition ilike '%create materialized view%'\n then 'materialized_view'\n else 'view'\n end as type\n from information_schema.views\n where table_schema ilike '{{ schema_relation.schema }}'\n union all\n select \n redshift_database_name as database,\n tablename as name,\n schemaname as schema,\n 'table' as type\n from svv_external_tables\n where schemaname ilike '{{ schema_relation.schema }}'\n {% endcall %}\n\n {{ return(load_result('list_relations_without_caching').table) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.89888, + "supported_languages": null + }, + "macro.my_nesso_project.snake_case": { + "name": "snake_case", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "unique_id": "macro.my_nesso_project.snake_case", + "macro_sql": "{%- macro snake_case(s) -%} {{ s | replace(\" \", \"_\") | replace(\"-\", \"_\") | lower }} {%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9019034, + "supported_languages": null + }, + "macro.my_nesso_project.get_model_dependencies": { + "name": "get_model_dependencies", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "unique_id": "macro.my_nesso_project.get_model_dependencies", + "macro_sql": "{% macro get_model_dependencies(model_name) %}\n {# Set to True to enable logging #}\n {% set info=False %}\n\n {{ \n log(\n \"get_model_dependencies | Getting upstream dependencies for model '\"\n ~ model_name\n ~ \"'...\",\n info=info\n )\n }}\n\n {% set upstream_fqns = [] %}\n\n {{ log(\"get_model_dependencies | Checking upstream models...\", info=info) }}\n {% for node in graph.nodes.values() | selectattr(\"name\", \"equalto\", model_name) %}\n {% if node.depends_on.nodes and not \"source.\" in node.depends_on.nodes[0] %}\n {# The node depends on another model. #}\n {{ \n log(\n \"get_model_dependencies | Got the following dependencies: \"\n ~ node.depends_on.nodes\n ~ \".\",\n info=info\n )\n }}\n {{ return({\"type\": \"model\", \"nodes\": node.depends_on.nodes}) }}\n {% endif %}\n {% endfor %}\n\n {{ log(\"get_model_dependencies | Checking upstream source...\", info=info) }}\n {% for node in graph.sources.values() | selectattr(\"name\", \"equalto\", model_name) %}\n {{ \n log(\n \"get_model_dependencies | Got the following dependencies: \" ~ node, info=info\n )\n }}\n {{ return({\"type\": \"source\", \"node\": node.unique_id}) }}\n {% endfor %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9037673, + "supported_languages": null + }, + "macro.my_nesso_project.get_source_or_model_column_metadata": { + "name": "get_source_or_model_column_metadata", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "unique_id": "macro.my_nesso_project.get_source_or_model_column_metadata", + "macro_sql": "{% macro get_source_or_model_column_metadata(model_name, model_type=\"model\") %}\n {# \nGet column metadata (description and tags) for a model or source.\n\nReturns: Dict[str, Dict[str, Any]]\n\nExample:\n>>> dbt run-operation get_source_or_model_column_metadata --args '{\"model_name\": \"c4c_contact\", \"model_type\": \"model\"}'\n>>> {\"id\": {\"description\": \"A\", \"tags\": []}}\n#}\n {% if model_type == \"model\" %} {% set nodes = graph.nodes.values() %}\n {% else %} {% set nodes = graph.sources.values() %}\n {% endif %}\n\n {% set columns_metadata_dict = {} %}\n {% for node in nodes | selectattr(\"name\", \"equalto\", model_name) %}\n {% for col_name, col_values in node.columns.items() %}\n {% do columns_metadata_dict.update(\n {\n col_name: {\n \"description\": col_values.description,\n \"tags\": col_values.tags,\n }\n }\n ) %}\n {% endfor %}\n {% endfor %}\n\n {{ return(columns_metadata_dict) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9050193, + "supported_languages": null + }, + "macro.my_nesso_project.get_parent_source_or_model_column_metadata": { + "name": "get_parent_source_or_model_column_metadata", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "unique_id": "macro.my_nesso_project.get_parent_source_or_model_column_metadata", + "macro_sql": "{% macro get_parent_source_or_model_column_metadata(model_name) %}\n {# \nGet column metadata (description and tags) for the model's or source's\nparent source or model.\n\nThis is useful for automatically populating YAML files of downstream models\nwith the information already provided in upstream (for example, if a view\nuses a field from a source amd this field's description is already available \nin the source's YAML file).\n\nNote that if the same column name exists in multiple upstream models, \nthe description will be overwritten at each loop and the final one\nwill be taken from the model that happens to be the last in the loop. \n\nReturns: Dict[str, Dict[str, Any]]\n\nExample:\n>>> dbt run-operation get_parent_source_or_model_column_metadata --args '{\"model_name\": \"c4c_contact\"}'\n>>> {\"id\": {\"description\": \"B\", \"tags\": []}}\n#}\n {# Set to True to enable logging to console #}\n {% set info = False %}\n\n {{\n log(\n \"get_parent_source_or_model_column_metadata | Getting column-level metadata for \"\n ~ model_type\n ~ \" '\"\n ~ model_name\n ~ \"'...\",\n info=info\n )\n }}\n\n {% if execute %}\n {% set dependencies = get_model_dependencies(model_name) %}\n {% set model_type = dependencies[\"type\"] %}\n\n {# Note we immediately return `column_metadata`, as outside the if/else, it's magically set to None. #}\n {% if model_type == \"model\" %}\n {% for full_model in dependencies[\"nodes\"] %}\n {% set upstream_model_name = full_model.split(\".\")[-1] %}\n {% set column_metadata = get_source_or_model_column_metadata(\n model_name=upstream_model_name, model_type=model_type\n ) %}\n {{\n log(\n \"get_parent_source_or_model_column_metadata() | Got model column metadata:\\n\\n\"\n ~ column_metadata\n ~ \"\\n\",\n info=info\n )\n }}\n {{ return(column_metadata) }}\n {% endfor %}\n {% endif %}\n\n {% if model_type == \"source\" %}\n {% set upstream_model_name = dependencies[\"node\"].split(\".\")[-1] %}\n {% set column_metadata = get_source_or_model_column_metadata(\n model_name=upstream_model_name, model_type=model_type\n ) %}\n {{\n log(\n \"get_parent_source_or_model_column_metadata() | Got source column metadata:\\n\\n\"\n ~ column_metadata\n ~ \"\\n\",\n info=info\n )\n }}\n {{ return(column_metadata) }}\n {% endif %}\n\n {% endif %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.my_nesso_project.get_model_dependencies", + "macro.my_nesso_project.get_source_or_model_column_metadata" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9069552, + "supported_languages": null + }, + "macro.my_nesso_project.get_source_or_model_metadata": { + "name": "get_source_or_model_metadata", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "unique_id": "macro.my_nesso_project.get_source_or_model_metadata", + "macro_sql": "{% macro get_source_or_model_metadata(model_name, model_type=\"model\") %}\n {# \nGet table metadata (description, tags, and meta) for a model or source.\n\nNote that if there are multiple upstream models, the metadata will\nbe overwritten at each loop and the final one will be taken from the model \nthat happens to be the last in the loop. \n\nReturns: Dict[str, Union[str, List[str], Dict[str, Any]]]\n\nExample:\n>>> dbt run-operation get_source_or_model_metadata --args '{\"model_name\": \"c4c_contact\", \"model_type\": \"model\"}'\n>>> {\"description\": \"A\", \"tags\": [], \"meta\": {\"owner\": js@example.com}}\n#}\n {# Set to True to enable debugging #}\n {% set info = False %}\n\n {{ \n log(\n \"get_source_or_model_metadata() | Getting model-level metadata for \" \n ~ model_type \n ~ \" '\" \n ~ model_name \n ~ \"'...\",\n info=info\n )\n }}\n\n {% if model_type == \"model\" %} {% set nodes = graph.nodes.values() %}\n {% else %} {% set nodes = graph.sources.values() %}\n {% endif %}\n\n {% set table_metadata_dict = {} %}\n {% for node in nodes | selectattr(\"name\", \"equalto\", model_name) %}\n {{ log(node, info=info) }}\n {% do table_metadata_dict.update(\n {\"description\": node.description, \"tags\": node.tags, \"meta\": node.meta}\n ) %}\n {% endfor %}\n\n {{\n log(\n \"get_source_or_model_metadata() | Successfully retrieved model-level metadata for \"\n ~ model_type\n ~ \" '\"\n ~ model_name\n ~ \"':\\n\"\n ~ table_metadata_dict,\n info=info\n )\n }}\n\n {{ return(table_metadata_dict) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.908675, + "supported_languages": null + }, + "macro.my_nesso_project.get_parent_source_or_model_metadata": { + "name": "get_parent_source_or_model_metadata", + "resource_type": "macro", + "package_name": "my_nesso_project", + "path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "original_file_path": "dbt_packages/nesso_macros/macros/codegen_helpers.sql", + "unique_id": "macro.my_nesso_project.get_parent_source_or_model_metadata", + "macro_sql": "{% macro get_parent_source_or_model_metadata(model_name) %}\n{#\nGet table metadata (description, tags, and meta) for the model's parent\nsource(s) and/or model(s).\n\nThis is useful for automatically populating YAML files of downstream models\nwith the information already provided in upstream (eg. when defining\nbase views).\n\nReturns: Dict[str, Union[str, List[str], Dict[str, Any]]]\n\nExample:\n>>> dbt run-operation get_parent_source_or_model_metadata --args '{\"model_name\": \"c4c_contact\"}'\n>>> {\"description\": \"B\", \"tags\": [], \"meta\": {\"owner\": js@example.com}}\n#}\n {% if execute %}\n\n {# Set to True to enable debugging. #}\n {% set info=False %}\n\n {{ log(\"get_parent_source_or_model_metadata | Getting upstream metadata...\", info=info) }}\n\n {% set dependencies = get_model_dependencies(model_name) %}\n {{\n log(\n \"get_parent_source_or_model_metadata() | Got the following dependencies: \"\n ~ dependencies,\n info=info\n )\n }}\n {% set model_type = dependencies[\"type\"] %}\n \n {# Note we immediately return `model_metadata`, as outside the if/else, it's magically set to None. #}\n {% if model_type == \"model\" %}\n {% for full_model in dependencies[\"nodes\"] %}\n {% set model_name = full_model.split(\".\")[-1] %}\n {% set model_metadata = get_source_or_model_metadata(\n model_name, model_type=model_type\n ) %}\n {% do return(model_metadata) %}\n {% endfor %}\n {% elif model_type == \"source\" %}\n {% set model_name = dependencies[\"node\"].split(\".\")[-1] %}\n {% set model_metadata = get_source_or_model_metadata(\n model_name, model_type=model_type\n ) %}\n {{\n log(\n \"get_parent_source_or_model_metadata| Got the following upstream sources:\\n\"\n ~ model_metadata,\n info=info\n )\n }}\n {% do return(model_metadata) %}\n {% else %} \n {{\n log(\n \"get_parent_source_or_model_metadata| Incorrect model type (\"\n ~ model_type\n ~ \").\",\n info=info\n )\n }}\n {% set model_metadata = {} %}\n {% do return(model_metadata) %}\n {% endif %}\n\n {{ log(\"get_parent_source_or_model_metadata | Finishing...\", info=info) }}\n {{ log(\"\", info=info) }}\n\n {% endif %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.my_nesso_project.get_model_dependencies", + "macro.my_nesso_project.get_source_or_model_metadata" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.910998, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__alter_relation_add_remove_columns": { + "name": "duckdb__alter_relation_add_remove_columns", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/columns.sql", + "original_file_path": "macros/columns.sql", + "unique_id": "macro.dbt_duckdb.duckdb__alter_relation_add_remove_columns", + "macro_sql": "{% macro duckdb__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}\n\n {% if add_columns %}\n {% for column in add_columns %}\n {% set sql -%}\n alter {{ relation.type }} {{ relation }} add column\n {{ column.name }} {{ column.data_type }}\n {%- endset -%}\n {% do run_query(sql) %}\n {% endfor %}\n {% endif %}\n\n {% if remove_columns %}\n {% for column in remove_columns %}\n {% set sql -%}\n alter {{ relation.type }} {{ relation }} drop column\n {{ column.name }}\n {%- endset -%}\n {% do run_query(sql) %}\n {% endfor %}\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9124444, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__snapshot_merge_sql": { + "name": "duckdb__snapshot_merge_sql", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/snapshot_helper.sql", + "original_file_path": "macros/snapshot_helper.sql", + "unique_id": "macro.dbt_duckdb.duckdb__snapshot_merge_sql", + "macro_sql": "{% macro duckdb__snapshot_merge_sql(target, source, insert_cols) -%}\n {%- set insert_cols_csv = insert_cols | join(', ') -%}\n\n update {{ target }} as DBT_INTERNAL_TARGET\n set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to\n from {{ source }} as DBT_INTERNAL_SOURCE\n where DBT_INTERNAL_SOURCE.dbt_scd_id::text = DBT_INTERNAL_TARGET.dbt_scd_id::text\n and DBT_INTERNAL_SOURCE.dbt_change_type::text in ('update'::text, 'delete'::text)\n and DBT_INTERNAL_TARGET.dbt_valid_to is null;\n\n insert into {{ target }} ({{ insert_cols_csv }})\n select {% for column in insert_cols -%}\n DBT_INTERNAL_SOURCE.{{ column }} {%- if not loop.last %}, {%- endif %}\n {%- endfor %}\n from {{ source }} as DBT_INTERNAL_SOURCE\n where DBT_INTERNAL_SOURCE.dbt_change_type::text = 'insert'::text;\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9136496, + "supported_languages": null + }, + "macro.dbt_duckdb.build_snapshot_staging_table": { + "name": "build_snapshot_staging_table", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/snapshot_helper.sql", + "original_file_path": "macros/snapshot_helper.sql", + "unique_id": "macro.dbt_duckdb.build_snapshot_staging_table", + "macro_sql": "{% macro build_snapshot_staging_table(strategy, sql, target_relation) %}\n {% set temp_relation = make_temp_relation(target_relation) %}\n\n {% set select = snapshot_staging_table(strategy, sql, target_relation) %}\n\n {% call statement('build_snapshot_staging_relation') %}\n {{ create_table_as(False, temp_relation, select) }}\n {% endcall %}\n\n {% do return(temp_relation) %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.make_temp_relation", + "macro.dbt.snapshot_staging_table", + "macro.dbt.statement", + "macro.dbt.create_table_as" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9142869, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__post_snapshot": { + "name": "duckdb__post_snapshot", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/snapshot_helper.sql", + "original_file_path": "macros/snapshot_helper.sql", + "unique_id": "macro.dbt_duckdb.duckdb__post_snapshot", + "macro_sql": "{% macro duckdb__post_snapshot(staging_relation) %}\n {% do return(drop_relation(staging_relation)) %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.drop_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.914514, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__create_schema": { + "name": "duckdb__create_schema", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__create_schema", + "macro_sql": "{% macro duckdb__create_schema(relation) -%}\n {%- call statement('create_schema') -%}\n {% set sql %}\n select type from duckdb_databases()\n where database_name='{{ relation.database }}'\n and type='sqlite'\n {% endset %}\n {% set results = run_query(sql) %}\n {% if results|length == 0 %}\n create schema if not exists {{ relation.without_identifier() }}\n {% else %}\n {% if relation.schema!='main' %}\n {{ exceptions.raise_compiler_error(\n \"Schema must be 'main' when writing to sqlite \"\n ~ \"instead got \" ~ relation.schema\n )}}\n {% endif %}\n {% endif %}\n {%- endcall -%}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement", "macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9227583, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__drop_schema": { + "name": "duckdb__drop_schema", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__drop_schema", + "macro_sql": "{% macro duckdb__drop_schema(relation) -%}\n {%- call statement('drop_schema') -%}\n drop schema if exists {{ relation.without_identifier() }} cascade\n {%- endcall -%}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9230433, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__list_schemas": { + "name": "duckdb__list_schemas", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__list_schemas", + "macro_sql": "{% macro duckdb__list_schemas(database) -%}\n {% set sql %}\n select schema_name\n from system.information_schema.schemata\n {% if database is not none %}\n where catalog_name = '{{ database }}'\n {% endif %}\n {% endset %}\n {{ return(run_query(sql)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9234731, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__check_schema_exists": { + "name": "duckdb__check_schema_exists", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__check_schema_exists", + "macro_sql": "{% macro duckdb__check_schema_exists(information_schema, schema) -%}\n {% set sql -%}\n select count(*)\n from system.information_schema.schemata\n where schema_name = '{{ schema }}'\n and catalog_name = '{{ information_schema.database }}'\n {%- endset %}\n {{ return(run_query(sql)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.923937, + "supported_languages": null + }, + "macro.dbt_duckdb.get_column_names": { + "name": "get_column_names", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.get_column_names", + "macro_sql": "{% macro get_column_names() %}\n {# loop through user_provided_columns to get column names #}\n {%- set user_provided_columns = model['columns'] -%}\n (\n {% for i in user_provided_columns %}\n {% set col = user_provided_columns[i] %}\n {{ col['name'] }} {{ \",\" if not loop.last }}\n {% endfor %}\n )\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.924471, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__create_table_as": { + "name": "duckdb__create_table_as", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__create_table_as", + "macro_sql": "{% macro duckdb__create_table_as(temporary, relation, compiled_code, language='sql') -%}\n {%- if language == 'sql' -%}\n {% set contract_config = config.get('contract') %}\n {% if contract_config.enforced %}\n {{ get_assert_columns_equivalent(compiled_code) }}\n {% endif %}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n\n create {% if temporary: -%}temporary{%- endif %} table\n {{ relation.include(database=(not temporary), schema=(not temporary)) }}\n {% if contract_config.enforced and not temporary %}\n {#-- DuckDB doesnt support constraints on temp tables --#}\n {{ get_table_columns_and_constraints() }} ;\n insert into {{ relation }} {{ get_column_names() }} (\n {{ get_select_subquery(compiled_code) }}\n );\n {% else %}\n as (\n {{ compiled_code }}\n );\n {% endif %}\n {%- elif language == 'python' -%}\n {{ py_write_table(temporary=temporary, relation=relation, compiled_code=compiled_code) }}\n {%- else -%}\n {% do exceptions.raise_compiler_error(\"duckdb__create_table_as macro didn't get supported language, it got %s\" % language) %}\n {%- endif -%}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.get_assert_columns_equivalent", + "macro.dbt.get_table_columns_and_constraints", + "macro.dbt_duckdb.get_column_names", + "macro.dbt.get_select_subquery", + "macro.dbt_duckdb.py_write_table" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9260561, + "supported_languages": null + }, + "macro.dbt_duckdb.py_write_table": { + "name": "py_write_table", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.py_write_table", + "macro_sql": "{% macro py_write_table(temporary, relation, compiled_code) -%}\n{{ compiled_code }}\n\ndef materialize(df, con):\n try:\n import pyarrow\n pyarrow_available = True\n except ImportError:\n pyarrow_available = False\n finally:\n if pyarrow_available and isinstance(df, pyarrow.Table):\n # https://github.com/duckdb/duckdb/issues/6584\n import pyarrow.dataset\n con.execute('create table {{ relation }} as select * from df')\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9263368, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__create_view_as": { + "name": "duckdb__create_view_as", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__create_view_as", + "macro_sql": "{% macro duckdb__create_view_as(relation, sql) -%}\n {% set contract_config = config.get('contract') %}\n {% if contract_config.enforced %}\n {{ get_assert_columns_equivalent(sql) }}\n {%- endif %}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n create view {{ relation }} as (\n {{ sql }}\n );\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_assert_columns_equivalent"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9269922, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__get_columns_in_relation": { + "name": "duckdb__get_columns_in_relation", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__get_columns_in_relation", + "macro_sql": "{% macro duckdb__get_columns_in_relation(relation) -%}\n {% call statement('get_columns_in_relation', fetch_result=True) %}\n select\n column_name,\n data_type,\n character_maximum_length,\n numeric_precision,\n numeric_scale\n\n from system.information_schema.columns\n where table_name = '{{ relation.identifier }}'\n {% if relation.schema %}\n and table_schema = '{{ relation.schema }}'\n {% endif %}\n {% if relation.database %}\n and table_catalog = '{{ relation.database }}'\n {% endif %}\n order by ordinal_position\n\n {% endcall %}\n {% set table = load_result('get_columns_in_relation').table %}\n {{ return(sql_convert_columns_in_relation(table)) }}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.statement", + "macro.dbt.sql_convert_columns_in_relation" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9277601, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__list_relations_without_caching": { + "name": "duckdb__list_relations_without_caching", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__list_relations_without_caching", + "macro_sql": "{% macro duckdb__list_relations_without_caching(schema_relation) %}\n {% call statement('list_relations_without_caching', fetch_result=True) -%}\n select\n '{{ schema_relation.database }}' as database,\n table_name as name,\n table_schema as schema,\n CASE table_type\n WHEN 'BASE TABLE' THEN 'table'\n WHEN 'VIEW' THEN 'view'\n WHEN 'LOCAL TEMPORARY' THEN 'table'\n END as type\n from system.information_schema.tables\n where table_schema = '{{ schema_relation.schema }}'\n and table_catalog = '{{ schema_relation.database }}'\n {% endcall %}\n {{ return(load_result('list_relations_without_caching').table) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9282615, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__drop_relation": { + "name": "duckdb__drop_relation", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__drop_relation", + "macro_sql": "{% macro duckdb__drop_relation(relation) -%}\n {% call statement('drop_relation', auto_begin=False) -%}\n drop {{ relation.type }} if exists {{ relation }} cascade\n {%- endcall %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9286013, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__rename_relation": { + "name": "duckdb__rename_relation", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__rename_relation", + "macro_sql": "{% macro duckdb__rename_relation(from_relation, to_relation) -%}\n {% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}\n {% call statement('rename_relation') -%}\n alter {{ to_relation.type }} {{ from_relation }} rename to {{ target_name }}\n {%- endcall %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9290712, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__make_temp_relation": { + "name": "duckdb__make_temp_relation", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__make_temp_relation", + "macro_sql": "{% macro duckdb__make_temp_relation(base_relation, suffix) %}\n {% set tmp_identifier = base_relation.identifier ~ suffix ~ py_current_timestring() %}\n {% do return(base_relation.incorporate(\n path={\n \"identifier\": tmp_identifier,\n \"schema\": none,\n \"database\": none\n })) -%}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.py_current_timestring"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9296649, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__current_timestamp": { + "name": "duckdb__current_timestamp", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__current_timestamp", + "macro_sql": "{% macro duckdb__current_timestamp() -%}\n now()\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.929784, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__snapshot_string_as_time": { + "name": "duckdb__snapshot_string_as_time", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__snapshot_string_as_time", + "macro_sql": "{% macro duckdb__snapshot_string_as_time(timestamp) -%}\n {%- set result = \"'\" ~ timestamp ~ \"'::timestamp\" -%}\n {{ return(result) }}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9300454, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__snapshot_get_time": { + "name": "duckdb__snapshot_get_time", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__snapshot_get_time", + "macro_sql": "{% macro duckdb__snapshot_get_time() -%}\n {{ current_timestamp() }}::timestamp\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.current_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9301984, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__get_incremental_default_sql": { + "name": "duckdb__get_incremental_default_sql", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.duckdb__get_incremental_default_sql", + "macro_sql": "{% macro duckdb__get_incremental_default_sql(arg_dict) %}\n {% do return(get_incremental_delete_insert_sql(arg_dict)) %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_incremental_delete_insert_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9304547, + "supported_languages": null + }, + "macro.dbt_duckdb.location_exists": { + "name": "location_exists", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.location_exists", + "macro_sql": "{% macro location_exists(location) -%}\n {% do return(adapter.location_exists(location)) %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.930708, + "supported_languages": null + }, + "macro.dbt_duckdb.write_to_file": { + "name": "write_to_file", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.write_to_file", + "macro_sql": "{% macro write_to_file(relation, location, options) -%}\n {% call statement('write_to_file') -%}\n copy {{ relation }} to '{{ location }}' ({{ options }})\n {%- endcall %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9310374, + "supported_languages": null + }, + "macro.dbt_duckdb.store_relation": { + "name": "store_relation", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.store_relation", + "macro_sql": "{% macro store_relation(plugin, relation, location, format, config) -%}\n {%- set column_list = adapter.get_columns_in_relation(relation) -%}\n {% do adapter.store_relation(plugin, relation, column_list, location, format, config) %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9314718, + "supported_languages": null + }, + "macro.dbt_duckdb.render_write_options": { + "name": "render_write_options", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/adapters.sql", + "original_file_path": "macros/adapters.sql", + "unique_id": "macro.dbt_duckdb.render_write_options", + "macro_sql": "{% macro render_write_options(config) -%}\n {% set options = config.get('options', {}) %}\n {% if options is not mapping %}\n {% do exceptions.raise_compiler_error(\"The options argument must be a dictionary\") %}\n {% endif %}\n\n {% for k in options %}\n {% set _ = options.update({k: render(options[k])}) %}\n {% endfor %}\n\n {# legacy top-level write options #}\n {% if config.get('format') %}\n {% set _ = options.update({'format': render(config.get('format'))}) %}\n {% endif %}\n {% if config.get('delimiter') %}\n {% set _ = options.update({'delimiter': render(config.get('delimiter'))}) %}\n {% endif %}\n\n {% do return(options) %}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.93291, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__get_delete_insert_merge_sql": { + "name": "duckdb__get_delete_insert_merge_sql", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/incremental_helper.sql", + "original_file_path": "macros/incremental_helper.sql", + "unique_id": "macro.dbt_duckdb.duckdb__get_delete_insert_merge_sql", + "macro_sql": "{% macro duckdb__get_delete_insert_merge_sql(target, source, unique_key, dest_columns, incremental_predicates) -%}\n\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n\n {% if unique_key %}\n {% if unique_key is sequence and unique_key is not string %}\n delete from {{target }} as DBT_INCREMENTAL_TARGET\n using {{ source }}\n where (\n {% for key in unique_key %}\n {{ source }}.{{ key }} = DBT_INCREMENTAL_TARGET.{{ key }}\n {{ \"and \" if not loop.last}}\n {% endfor %}\n {% if incremental_predicates %}\n {% for predicate in incremental_predicates %}\n and {{ predicate }}\n {% endfor %}\n {% endif %}\n );\n {% else %}\n delete from {{ target }}\n where (\n {{ unique_key }}) in (\n select ({{ unique_key }})\n from {{ source }}\n )\n {%- if incremental_predicates %}\n {% for predicate in incremental_predicates %}\n and {{ predicate }}\n {% endfor %}\n {%- endif -%};\n\n {% endif %}\n {% endif %}\n\n insert into {{ target }} ({{ dest_cols_csv }})\n (\n select {{ dest_cols_csv }}\n from {{ source }}\n )\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_quoted_csv"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9354093, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__get_catalog": { + "name": "duckdb__get_catalog", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/catalog.sql", + "original_file_path": "macros/catalog.sql", + "unique_id": "macro.dbt_duckdb.duckdb__get_catalog", + "macro_sql": "{% macro duckdb__get_catalog(information_schema, schemas) -%}\n {%- call statement('catalog', fetch_result=True) -%}\n select\n '{{ database }}' as table_database,\n t.table_schema,\n t.table_name,\n t.table_type,\n '' as table_comment,\n c.column_name,\n c.ordinal_position as column_index,\n c.data_type column_type,\n '' as column_comment,\n '' as table_owner\n FROM information_schema.tables t JOIN information_schema.columns c ON t.table_schema = c.table_schema AND t.table_name = c.table_name\n WHERE (\n {%- for schema in schemas -%}\n upper(t.table_schema) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}\n {%- endfor -%}\n )\n AND t.table_type IN ('BASE TABLE', 'VIEW')\n ORDER BY\n t.table_schema,\n t.table_name,\n c.ordinal_position\n {%- endcall -%}\n {{ return(load_result('catalog').table) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9363058, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__get_binding_char": { + "name": "duckdb__get_binding_char", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/seed.sql", + "original_file_path": "macros/seed.sql", + "unique_id": "macro.dbt_duckdb.duckdb__get_binding_char", + "macro_sql": "{% macro duckdb__get_binding_char() %}\n {{ return(adapter.get_binding_char()) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.938082, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__get_batch_size": { + "name": "duckdb__get_batch_size", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/seed.sql", + "original_file_path": "macros/seed.sql", + "unique_id": "macro.dbt_duckdb.duckdb__get_batch_size", + "macro_sql": "{% macro duckdb__get_batch_size() %}\n {{ return(10000) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.938265, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__load_csv_rows": { + "name": "duckdb__load_csv_rows", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/seed.sql", + "original_file_path": "macros/seed.sql", + "unique_id": "macro.dbt_duckdb.duckdb__load_csv_rows", + "macro_sql": "{% macro duckdb__load_csv_rows(model, agate_table) %}\n {% if config.get('fast', true) %}\n {% set seed_file_path = adapter.get_seed_file_path(model) %}\n {% set delimiter = config.get('delimiter', ',') %}\n {% set sql %}\n COPY {{ this.render() }} FROM '{{ seed_file_path }}' (FORMAT CSV, HEADER TRUE, DELIMITER '{{ delimiter }}')\n {% endset %}\n {% do adapter.add_query(sql, abridge_sql_log=True) %}\n {{ return(sql) }}\n {% endif %}\n\n {% set batch_size = get_batch_size() %}\n {% set agate_table = adapter.convert_datetimes_to_strs(agate_table) %}\n {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %}\n {% set bindings = [] %}\n\n {% set statements = [] %}\n\n {% for chunk in agate_table.rows | batch(batch_size) %}\n {% set bindings = [] %}\n\n {% for row in chunk %}\n {% do bindings.extend(row) %}\n {% endfor %}\n\n {% set sql %}\n insert into {{ this.render() }} ({{ cols_sql }}) values\n {% for row in chunk -%}\n ({%- for column in agate_table.column_names -%}\n {{ get_binding_char() }}\n {%- if not loop.last%},{%- endif %}\n {%- endfor -%})\n {%- if not loop.last%},{%- endif %}\n {%- endfor %}\n {% endset %}\n\n {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %}\n\n {% if loop.index0 == 0 %}\n {% do statements.append(sql) %}\n {% endif %}\n {% endfor %}\n\n {# Return SQL so we can render it out into the compiled files #}\n {{ return(statements[0]) }}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.get_batch_size", + "macro.dbt.get_seed_column_quoted_csv", + "macro.dbt.get_binding_char" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9410715, + "supported_languages": null + }, + "macro.dbt_duckdb.materialization_external_duckdb": { + "name": "materialization_external_duckdb", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/materializations/external.sql", + "original_file_path": "macros/materializations/external.sql", + "unique_id": "macro.dbt_duckdb.materialization_external_duckdb", + "macro_sql": "{% materialization external, adapter=\"duckdb\", supported_languages=['sql', 'python'] %}\n\n {%- set location = render(config.get('location', default=external_location(this, config))) -%})\n {%- set rendered_options = render_write_options(config) -%}\n {%- set format = config.get('format', 'parquet') -%}\n {%- set write_options = adapter.external_write_options(location, rendered_options) -%}\n {%- set read_location = adapter.external_read_location(location, rendered_options) -%}\n\n -- set language - python or sql\n {%- set language = model['language'] -%}\n\n {%- set target_relation = this.incorporate(type='view') %}\n\n -- Continue as normal materialization\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set temp_relation = make_intermediate_relation(this.incorporate(type='table'), suffix='__dbt_tmp') -%}\n {%- set intermediate_relation = make_intermediate_relation(target_relation, suffix='__dbt_int') -%}\n -- the intermediate_relation should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation\n {%- set preexisting_temp_relation = load_cached_relation(temp_relation) -%}\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%}\n /*\n See ../view/view.sql for more information about this relation.\n */\n {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n -- as above, the backup_relation should not already exist\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n\n -- drop the temp relations if they exist already in the database\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_temp_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% call statement('create_table', language=language) -%}\n {{- create_table_as(False, temp_relation, compiled_code, language) }}\n {%- endcall %}\n\n -- write an temp relation into file\n {{ write_to_file(temp_relation, location, write_options) }}\n -- create a view on top of the location\n {% call statement('main', language='sql') -%}\n create or replace view {{ intermediate_relation }} as (\n select * from '{{ read_location }}'\n );\n {%- endcall %}\n\n -- cleanup\n {% if existing_relation is not none %}\n {{ adapter.rename_relation(existing_relation, backup_relation) }}\n {% endif %}\n\n {{ adapter.rename_relation(intermediate_relation, target_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n -- finally, drop the existing/backup relation after the commit\n {{ drop_relation_if_exists(backup_relation) }}\n {{ drop_relation_if_exists(temp_relation) }}\n\n -- register table into glue\n {%- set plugin_name = config.get('plugin') -%}\n {%- set glue_register = config.get('glue_register', default=false) -%}\n {%- set partition_columns = config.get('partition_columns', []) -%}\n {% if plugin_name is not none or glue_register is true %}\n {% if glue_register %}\n {# legacy hack to set the glue database name, deprecate this #}\n {%- set plugin_name = 'glue|' ~ config.get('glue_database', 'default') -%}\n {% endif %}\n {% do store_relation(plugin_name, target_relation, location, format, config) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", + "depends_on": { + "macros": [ + "macro.dbt_duckdb.external_location", + "macro.dbt_duckdb.render_write_options", + "macro.dbt.load_cached_relation", + "macro.dbt.make_intermediate_relation", + "macro.dbt.make_backup_relation", + "macro.dbt.drop_relation_if_exists", + "macro.dbt.run_hooks", + "macro.dbt.statement", + "macro.dbt.create_table_as", + "macro.dbt_duckdb.write_to_file", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs", + "macro.dbt_duckdb.store_relation" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9479668, + "supported_languages": ["sql", "python"] + }, + "macro.dbt_duckdb.materialization_incremental_duckdb": { + "name": "materialization_incremental_duckdb", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/materializations/incremental.sql", + "original_file_path": "macros/materializations/incremental.sql", + "unique_id": "macro.dbt_duckdb.materialization_incremental_duckdb", + "macro_sql": "{% materialization incremental, adapter=\"duckdb\", supported_languages=['sql', 'python'] -%}\n\n {%- set language = model['language'] -%}\n -- only create temp tables if using local duckdb, as it is not currently supported for remote databases\n {%- set temporary = not adapter.is_motherduck() -%}\n\n -- relations\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set target_relation = this.incorporate(type='table') -%}\n {%- set temp_relation = make_temp_relation(target_relation)-%}\n {%- set intermediate_relation = make_intermediate_relation(target_relation)-%}\n {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n\n -- configs\n {%- set unique_key = config.get('unique_key') -%}\n {%- set full_refresh_mode = (should_full_refresh() or existing_relation.is_view) -%}\n {%- set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') -%}\n\n -- the temp_ and backup_ relations should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation. This has to happen before\n -- BEGIN, in a separate transaction\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation)-%}\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set to_drop = [] %}\n\n {% if existing_relation is none %}\n {% set build_sql = create_table_as(False, target_relation, compiled_code, language) %}\n {% elif full_refresh_mode %}\n {% set build_sql = create_table_as(False, intermediate_relation, compiled_code, language) %}\n {% set need_swap = true %}\n {% else %}\n {% if not temporary %}\n -- if not using a temporary table we will update the temp relation to use a different temp schema (\"dbt_temp\" by default)\n {% set temp_relation = temp_relation.incorporate(path=adapter.get_temp_relation_path(this)) %}\n {% do run_query(create_schema(temp_relation)) %}\n -- then drop the temp relation after we insert the incremental data into the target relation\n {% do to_drop.append(temp_relation) %}\n {% endif %}\n {% if language == 'python' %}\n {% set build_python = create_table_as(False, temp_relation, compiled_code, language) %}\n {% call statement(\"pre\", language=language) %}\n {{- build_python }}\n {% endcall %}\n {% else %} {# SQL #}\n {% do run_query(create_table_as(temporary, temp_relation, compiled_code, language)) %}\n {% endif %}\n {% do adapter.expand_target_column_types(\n from_relation=temp_relation,\n to_relation=target_relation) %}\n {#-- Process schema changes. Returns dict of changes if successful. Use source columns for upserting/merging --#}\n {% set dest_columns = process_schema_changes(on_schema_change, temp_relation, existing_relation) %}\n {% if not dest_columns %}\n {% set dest_columns = adapter.get_columns_in_relation(existing_relation) %}\n {% endif %}\n\n {#-- Get the incremental_strategy, the macro to use for the strategy, and build the sql --#}\n {% set incremental_strategy = config.get('incremental_strategy') or 'default' %}\n {% set incremental_predicates = config.get('predicates', none) or config.get('incremental_predicates', none) %}\n {% set strategy_sql_macro_func = adapter.get_incremental_strategy_macro(context, incremental_strategy) %}\n {% set strategy_arg_dict = ({'target_relation': target_relation, 'temp_relation': temp_relation, 'unique_key': unique_key, 'dest_columns': dest_columns, 'incremental_predicates': incremental_predicates }) %}\n {% set build_sql = strategy_sql_macro_func(strategy_arg_dict) %}\n {% set language = \"sql\" %}\n\n {% endif %}\n\n {% call statement(\"main\", language=language) %}\n {{- build_sql }}\n {% endcall %}\n\n {% if need_swap %}\n {% do adapter.rename_relation(target_relation, backup_relation) %}\n {% do adapter.rename_relation(intermediate_relation, target_relation) %}\n {% do to_drop.append(backup_relation) %}\n {% endif %}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {% if existing_relation is none or existing_relation.is_view or should_full_refresh() %}\n {% do create_indexes(target_relation) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {% do adapter.commit() %}\n\n {% for rel in to_drop %}\n {% do adapter.drop_relation(rel) %}\n {% endfor %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{%- endmaterialization %}", + "depends_on": { + "macros": [ + "macro.dbt.load_cached_relation", + "macro.dbt.make_temp_relation", + "macro.dbt.make_intermediate_relation", + "macro.dbt.make_backup_relation", + "macro.dbt.should_full_refresh", + "macro.dbt.incremental_validate_on_schema_change", + "macro.dbt.drop_relation_if_exists", + "macro.dbt.run_hooks", + "macro.dbt.create_table_as", + "macro.dbt.run_query", + "macro.dbt.create_schema", + "macro.dbt.statement", + "macro.dbt.process_schema_changes", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs", + "macro.dbt.create_indexes" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9566796, + "supported_languages": ["sql", "python"] + }, + "macro.dbt_duckdb.materialization_table_duckdb": { + "name": "materialization_table_duckdb", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/materializations/table.sql", + "original_file_path": "macros/materializations/table.sql", + "unique_id": "macro.dbt_duckdb.materialization_table_duckdb", + "macro_sql": "{% materialization table, adapter=\"duckdb\", supported_languages=['sql', 'python'] %}\n\n {%- set language = model['language'] -%}\n\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set target_relation = this.incorporate(type='table') %}\n {%- set intermediate_relation = make_intermediate_relation(target_relation) -%}\n -- the intermediate_relation should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%}\n /*\n See ../view/view.sql for more information about this relation.\n */\n {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n -- as above, the backup_relation should not already exist\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n\n -- drop the temp relations if they exist already in the database\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% call statement('main', language=language) -%}\n {{- create_table_as(False, intermediate_relation, compiled_code, language) }}\n {%- endcall %}\n\n -- cleanup\n {% if existing_relation is not none %}\n {{ adapter.rename_relation(existing_relation, backup_relation) }}\n {% endif %}\n\n {{ adapter.rename_relation(intermediate_relation, target_relation) }}\n\n {% do create_indexes(target_relation) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n -- finally, drop the existing/backup relation after the commit\n {{ drop_relation_if_exists(backup_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n{% endmaterialization %}", + "depends_on": { + "macros": [ + "macro.dbt.load_cached_relation", + "macro.dbt.make_intermediate_relation", + "macro.dbt.make_backup_relation", + "macro.dbt.drop_relation_if_exists", + "macro.dbt.run_hooks", + "macro.dbt.statement", + "macro.dbt.create_table_as", + "macro.dbt.create_indexes", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9604917, + "supported_languages": ["sql", "python"] + }, + "macro.dbt_duckdb.duckdb__listagg": { + "name": "duckdb__listagg", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/utils/listagg.sql", + "original_file_path": "macros/utils/listagg.sql", + "unique_id": "macro.dbt_duckdb.duckdb__listagg", + "macro_sql": "{% macro duckdb__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}\n {% if limit_num -%}\n list_aggr(\n (array_agg(\n {{ measure }}\n {% if order_by_clause -%}\n {{ order_by_clause }}\n {%- endif %}\n ))[1:{{ limit_num }}],\n 'string_agg',\n {{ delimiter_text }}\n )\n {%- else %}\n string_agg(\n {{ measure }},\n {{ delimiter_text }}\n {% if order_by_clause -%}\n {{ order_by_clause }}\n {%- endif %}\n )\n {%- endif %}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9613912, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__split_part": { + "name": "duckdb__split_part", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/utils/splitpart.sql", + "original_file_path": "macros/utils/splitpart.sql", + "unique_id": "macro.dbt_duckdb.duckdb__split_part", + "macro_sql": "{% macro duckdb__split_part(string_text, delimiter_text, part_number) %}\n string_split({{ string_text }}, {{ delimiter_text }})[ {{ part_number }} ]\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9617295, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__datediff": { + "name": "duckdb__datediff", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/utils/datediff.sql", + "original_file_path": "macros/utils/datediff.sql", + "unique_id": "macro.dbt_duckdb.duckdb__datediff", + "macro_sql": "{% macro duckdb__datediff(first_date, second_date, datepart) -%}\n date_diff('{{ datepart }}', {{ first_date }}::timestamp, {{ second_date}}::timestamp )\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9620335, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__any_value": { + "name": "duckdb__any_value", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/utils/any_value.sql", + "original_file_path": "macros/utils/any_value.sql", + "unique_id": "macro.dbt_duckdb.duckdb__any_value", + "macro_sql": "{% macro duckdb__any_value(expression) -%}\n\n arbitrary({{ expression }})\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9622262, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__dateadd": { + "name": "duckdb__dateadd", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/utils/dateadd.sql", + "original_file_path": "macros/utils/dateadd.sql", + "unique_id": "macro.dbt_duckdb.duckdb__dateadd", + "macro_sql": "{% macro duckdb__dateadd(datepart, interval, from_date_or_timestamp) %}\n\n date_add({{ from_date_or_timestamp }}, interval ({{ interval }}) {{ datepart }})\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9625213, + "supported_languages": null + }, + "macro.dbt_duckdb.duckdb__last_day": { + "name": "duckdb__last_day", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/utils/lastday.sql", + "original_file_path": "macros/utils/lastday.sql", + "unique_id": "macro.dbt_duckdb.duckdb__last_day", + "macro_sql": "{% macro duckdb__last_day(date, datepart) -%}\n\n {%- if datepart == 'quarter' -%}\n -- duckdb dateadd does not support quarter interval.\n cast(\n {{dbt.dateadd('day', '-1',\n dbt.dateadd('month', '3', dbt.date_trunc(datepart, date))\n )}}\n as date)\n {%- else -%}\n {{dbt.default_last_day(date, datepart)}}\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.dateadd", + "macro.dbt.date_trunc", + "macro.dbt.default_last_day" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9632158, + "supported_languages": null + }, + "macro.dbt_duckdb.register_upstream_external_models": { + "name": "register_upstream_external_models", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/utils/upstream.sql", + "original_file_path": "macros/utils/upstream.sql", + "unique_id": "macro.dbt_duckdb.register_upstream_external_models", + "macro_sql": "{%- macro register_upstream_external_models() -%}\n{% if execute %}\n{% set upstream_nodes = {} %}\n{% set upstream_schemas = {} %}\n{% for node in selected_resources %}\n {% for upstream_node in graph['nodes'][node]['depends_on']['nodes'] %}\n {% if upstream_node not in upstream_nodes and upstream_node not in selected_resources %}\n {% do upstream_nodes.update({upstream_node: None}) %}\n {% set upstream = graph['nodes'].get(upstream_node) %}\n {% if upstream\n and upstream.resource_type in ('model', 'seed')\n and upstream.config.materialized=='external'\n %}\n {%- set upstream_rel = api.Relation.create(\n database=upstream['database'],\n schema=upstream['schema'],\n identifier=upstream['alias']\n ) -%}\n {%- set location = upstream.config.get('location', external_location(upstream_rel, upstream.config)) -%}\n {%- set rendered_options = render_write_options(upstream.config) -%}\n {%- set upstream_location = adapter.external_read_location(location, rendered_options) -%}\n {% if upstream_rel.schema not in upstream_schemas %}\n {% call statement('main', language='sql') -%}\n create schema if not exists {{ upstream_rel.schema }}\n {%- endcall %}\n {% do upstream_schemas.update({upstream_rel.schema: None}) %}\n {% endif %}\n {% call statement('main', language='sql') -%}\n create or replace view {{ upstream_rel }} as (\n select * from '{{ upstream_location }}'\n );\n {%- endcall %}\n {%- endif %}\n {% endif %}\n {% endfor %}\n{% endfor %}\n{% do adapter.commit() %}\n{% endif %}\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt_duckdb.external_location", + "macro.dbt_duckdb.render_write_options", + "macro.dbt.statement" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9666445, + "supported_languages": null + }, + "macro.dbt_duckdb.external_location": { + "name": "external_location", + "resource_type": "macro", + "package_name": "dbt_duckdb", + "path": "macros/utils/external_location.sql", + "original_file_path": "macros/utils/external_location.sql", + "unique_id": "macro.dbt_duckdb.external_location", + "macro_sql": "{%- macro external_location(relation, config) -%}\n {%- if config.get('options', {}).get('partition_by') is none -%}\n {%- set format = config.get('format', 'parquet') -%}\n {{- adapter.external_root() }}/{{ relation.identifier }}.{{ format }}\n {%- else -%}\n {{- adapter.external_root() }}/{{ relation.identifier }}\n {%- endif -%}\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9675128, + "supported_languages": null + }, + "macro.dbt.resolve_model_name": { + "name": "resolve_model_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/python_model/python.sql", + "original_file_path": "macros/python_model/python.sql", + "unique_id": "macro.dbt.resolve_model_name", + "macro_sql": "{% macro resolve_model_name(input_model_name) %}\n {{ return(adapter.dispatch('resolve_model_name', 'dbt')(input_model_name)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__resolve_model_name"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9693503, + "supported_languages": null + }, + "macro.dbt.default__resolve_model_name": { + "name": "default__resolve_model_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/python_model/python.sql", + "original_file_path": "macros/python_model/python.sql", + "unique_id": "macro.dbt.default__resolve_model_name", + "macro_sql": "\n\n{%- macro default__resolve_model_name(input_model_name) -%}\n {{ input_model_name | string | replace('\"', '\\\"') }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.969627, + "supported_languages": null + }, + "macro.dbt.build_ref_function": { + "name": "build_ref_function", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/python_model/python.sql", + "original_file_path": "macros/python_model/python.sql", + "unique_id": "macro.dbt.build_ref_function", + "macro_sql": "{% macro build_ref_function(model) %}\n\n {%- set ref_dict = {} -%}\n {%- for _ref in model.refs -%}\n {% set _ref_args = [_ref.get('package'), _ref['name']] if _ref.get('package') else [_ref['name'],] %}\n {%- set resolved = ref(*_ref_args, v=_ref.get('version')) -%}\n {%- if _ref.get('version') -%}\n {% do _ref_args.extend([\"v\" ~ _ref['version']]) %}\n {%- endif -%}\n {%- do ref_dict.update({_ref_args | join('.'): resolve_model_name(resolved)}) -%}\n {%- endfor -%}\n\ndef ref(*args, **kwargs):\n refs = {{ ref_dict | tojson }}\n key = '.'.join(args)\n version = kwargs.get(\"v\") or kwargs.get(\"version\")\n if version:\n key += f\".v{version}\"\n dbt_load_df_function = kwargs.get(\"dbt_load_df_function\")\n return dbt_load_df_function(refs[key])\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.resolve_model_name"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.970907, + "supported_languages": null + }, + "macro.dbt.build_source_function": { + "name": "build_source_function", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/python_model/python.sql", + "original_file_path": "macros/python_model/python.sql", + "unique_id": "macro.dbt.build_source_function", + "macro_sql": "{% macro build_source_function(model) %}\n\n {%- set source_dict = {} -%}\n {%- for _source in model.sources -%}\n {%- set resolved = source(*_source) -%}\n {%- do source_dict.update({_source | join('.'): resolve_model_name(resolved)}) -%}\n {%- endfor -%}\n\ndef source(*args, dbt_load_df_function):\n sources = {{ source_dict | tojson }}\n key = '.'.join(args)\n return dbt_load_df_function(sources[key])\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.resolve_model_name"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9715781, + "supported_languages": null + }, + "macro.dbt.build_config_dict": { + "name": "build_config_dict", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/python_model/python.sql", + "original_file_path": "macros/python_model/python.sql", + "unique_id": "macro.dbt.build_config_dict", + "macro_sql": "{% macro build_config_dict(model) %}\n {%- set config_dict = {} -%}\n {% set config_dbt_used = zip(model.config.config_keys_used, model.config.config_keys_defaults) | list %}\n {%- for key, default in config_dbt_used -%}\n {# weird type testing with enum, would be much easier to write this logic in Python! #}\n {%- if key == \"language\" -%}\n {%- set value = \"python\" -%}\n {%- endif -%}\n {%- set value = model.config.get(key, default) -%}\n {%- do config_dict.update({key: value}) -%}\n {%- endfor -%}\nconfig_dict = {{ config_dict }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.972495, + "supported_languages": null + }, + "macro.dbt.py_script_postfix": { + "name": "py_script_postfix", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/python_model/python.sql", + "original_file_path": "macros/python_model/python.sql", + "unique_id": "macro.dbt.py_script_postfix", + "macro_sql": "{% macro py_script_postfix(model) %}\n# This part is user provided model code\n# you will need to copy the next section to run the code\n# COMMAND ----------\n# this part is dbt logic for get ref work, do not modify\n\n{{ build_ref_function(model ) }}\n{{ build_source_function(model ) }}\n{{ build_config_dict(model) }}\n\nclass config:\n def __init__(self, *args, **kwargs):\n pass\n\n @staticmethod\n def get(key, default=None):\n return config_dict.get(key, default)\n\nclass this:\n \"\"\"dbt.this() or dbt.this.identifier\"\"\"\n database = \"{{ this.database }}\"\n schema = \"{{ this.schema }}\"\n identifier = \"{{ this.identifier }}\"\n {% set this_relation_name = resolve_model_name(this) %}\n def __repr__(self):\n return '{{ this_relation_name }}'\n\n\nclass dbtObj:\n def __init__(self, load_df_function) -> None:\n self.source = lambda *args: source(*args, dbt_load_df_function=load_df_function)\n self.ref = lambda *args, **kwargs: ref(*args, **kwargs, dbt_load_df_function=load_df_function)\n self.config = config\n self.this = this()\n self.is_incremental = {{ is_incremental() }}\n\n# COMMAND ----------\n{{py_script_comment()}}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.build_ref_function", + "macro.dbt.build_source_function", + "macro.dbt.build_config_dict", + "macro.dbt.resolve_model_name", + "macro.dbt.is_incremental", + "macro.dbt.py_script_comment" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.973296, + "supported_languages": null + }, + "macro.dbt.py_script_comment": { + "name": "py_script_comment", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/python_model/python.sql", + "original_file_path": "macros/python_model/python.sql", + "unique_id": "macro.dbt.py_script_comment", + "macro_sql": "{%macro py_script_comment()%}\n{%endmacro%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9734125, + "supported_languages": null + }, + "macro.dbt.get_columns_in_relation": { + "name": "get_columns_in_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.get_columns_in_relation", + "macro_sql": "{% macro get_columns_in_relation(relation) -%}\n {{ return(adapter.dispatch('get_columns_in_relation', 'dbt')(relation)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__get_columns_in_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9758894, + "supported_languages": null + }, + "macro.dbt.default__get_columns_in_relation": { + "name": "default__get_columns_in_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.default__get_columns_in_relation", + "macro_sql": "{% macro default__get_columns_in_relation(relation) -%}\n {{ exceptions.raise_not_implemented(\n 'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9761302, + "supported_languages": null + }, + "macro.dbt.sql_convert_columns_in_relation": { + "name": "sql_convert_columns_in_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.sql_convert_columns_in_relation", + "macro_sql": "{% macro sql_convert_columns_in_relation(table) -%}\n {% set columns = [] %}\n {% for row in table %}\n {% do columns.append(api.Column(*row)) %}\n {% endfor %}\n {{ return(columns) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9765928, + "supported_languages": null + }, + "macro.dbt.get_empty_subquery_sql": { + "name": "get_empty_subquery_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.get_empty_subquery_sql", + "macro_sql": "{% macro get_empty_subquery_sql(select_sql, select_sql_header=none) -%}\n {{ return(adapter.dispatch('get_empty_subquery_sql', 'dbt')(select_sql, select_sql_header)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_empty_subquery_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9769258, + "supported_languages": null + }, + "macro.dbt.default__get_empty_subquery_sql": { + "name": "default__get_empty_subquery_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.default__get_empty_subquery_sql", + "macro_sql": "{% macro default__get_empty_subquery_sql(select_sql, select_sql_header=none) %}\n {%- if select_sql_header is not none -%}\n {{ select_sql_header }}\n {%- endif -%}\n select * from (\n {{ select_sql }}\n ) as __dbt_sbq\n where false\n limit 0\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9772277, + "supported_languages": null + }, + "macro.dbt.get_empty_schema_sql": { + "name": "get_empty_schema_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.get_empty_schema_sql", + "macro_sql": "{% macro get_empty_schema_sql(columns) -%}\n {{ return(adapter.dispatch('get_empty_schema_sql', 'dbt')(columns)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_empty_schema_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9775486, + "supported_languages": null + }, + "macro.dbt.default__get_empty_schema_sql": { + "name": "default__get_empty_schema_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.default__get_empty_schema_sql", + "macro_sql": "{% macro default__get_empty_schema_sql(columns) %}\n {%- set col_err = [] -%}\n {%- set col_naked_numeric = [] -%}\n select\n {% for i in columns %}\n {%- set col = columns[i] -%}\n {%- if col['data_type'] is not defined -%}\n {%- do col_err.append(col['name']) -%}\n {#-- If this column's type is just 'numeric' then it is missing precision/scale, raise a warning --#}\n {%- elif col['data_type'].strip().lower() in ('numeric', 'decimal', 'number') -%}\n {%- do col_naked_numeric.append(col['name']) -%}\n {%- endif -%}\n {% set col_name = adapter.quote(col['name']) if col.get('quote') else col['name'] %}\n cast(null as {{ col['data_type'] }}) as {{ col_name }}{{ \", \" if not loop.last }}\n {%- endfor -%}\n {%- if (col_err | length) > 0 -%}\n {{ exceptions.column_type_missing(column_names=col_err) }}\n {%- elif (col_naked_numeric | length) > 0 -%}\n {{ exceptions.warn(\"Detected columns with numeric type and unspecified precision/scale, this can lead to unintended rounding: \" ~ col_naked_numeric ~ \"`\") }}\n {%- endif -%}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9792736, + "supported_languages": null + }, + "macro.dbt.get_column_schema_from_query": { + "name": "get_column_schema_from_query", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.get_column_schema_from_query", + "macro_sql": "{% macro get_column_schema_from_query(select_sql, select_sql_header=none) -%}\n {% set columns = [] %}\n {# -- Using an 'empty subquery' here to get the same schema as the given select_sql statement, without necessitating a data scan.#}\n {% set sql = get_empty_subquery_sql(select_sql, select_sql_header) %}\n {% set column_schema = adapter.get_column_schema_from_query(sql) %}\n {{ return(column_schema) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_empty_subquery_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9798021, + "supported_languages": null + }, + "macro.dbt.get_columns_in_query": { + "name": "get_columns_in_query", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.get_columns_in_query", + "macro_sql": "{% macro get_columns_in_query(select_sql) -%}\n {{ return(adapter.dispatch('get_columns_in_query', 'dbt')(select_sql)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_columns_in_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9801123, + "supported_languages": null + }, + "macro.dbt.default__get_columns_in_query": { + "name": "default__get_columns_in_query", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.default__get_columns_in_query", + "macro_sql": "{% macro default__get_columns_in_query(select_sql) %}\n {% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}\n {{ get_empty_subquery_sql(select_sql) }}\n {% endcall %}\n {{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement", "macro.dbt.get_empty_subquery_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9806292, + "supported_languages": null + }, + "macro.dbt.alter_column_type": { + "name": "alter_column_type", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.alter_column_type", + "macro_sql": "{% macro alter_column_type(relation, column_name, new_column_type) -%}\n {{ return(adapter.dispatch('alter_column_type', 'dbt')(relation, column_name, new_column_type)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__alter_column_type"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.981003, + "supported_languages": null + }, + "macro.dbt.default__alter_column_type": { + "name": "default__alter_column_type", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.default__alter_column_type", + "macro_sql": "{% macro default__alter_column_type(relation, column_name, new_column_type) -%}\n {#\n 1. Create a new column (w/ temp name and correct type)\n 2. Copy data over to it\n 3. Drop the existing column (cascade!)\n 4. Rename the new column to existing column\n #}\n {%- set tmp_column = column_name + \"__dbt_alter\" -%}\n\n {% call statement('alter_column_type') %}\n alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};\n update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};\n alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;\n alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}\n {% endcall %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.981881, + "supported_languages": null + }, + "macro.dbt.alter_relation_add_remove_columns": { + "name": "alter_relation_add_remove_columns", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.alter_relation_add_remove_columns", + "macro_sql": "{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}\n {{ return(adapter.dispatch('alter_relation_add_remove_columns', 'dbt')(relation, add_columns, remove_columns)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__alter_relation_add_remove_columns"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.98225, + "supported_languages": null + }, + "macro.dbt.default__alter_relation_add_remove_columns": { + "name": "default__alter_relation_add_remove_columns", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/columns.sql", + "original_file_path": "macros/adapters/columns.sql", + "unique_id": "macro.dbt.default__alter_relation_add_remove_columns", + "macro_sql": "{% macro default__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}\n\n {% if add_columns is none %}\n {% set add_columns = [] %}\n {% endif %}\n {% if remove_columns is none %}\n {% set remove_columns = [] %}\n {% endif %}\n\n {% set sql -%}\n\n alter {{ relation.type }} {{ relation }}\n\n {% for column in add_columns %}\n add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}\n {% endfor %}{{ ',' if add_columns and remove_columns }}\n\n {% for column in remove_columns %}\n drop column {{ column.name }}{{ ',' if not loop.last }}\n {% endfor %}\n\n {%- endset -%}\n\n {% do run_query(sql) %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.983509, + "supported_languages": null + }, + "macro.dbt.make_intermediate_relation": { + "name": "make_intermediate_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.make_intermediate_relation", + "macro_sql": "{% macro make_intermediate_relation(base_relation, suffix='__dbt_tmp') %}\n {{ return(adapter.dispatch('make_intermediate_relation', 'dbt')(base_relation, suffix)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__make_intermediate_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.98564, + "supported_languages": null + }, + "macro.dbt.default__make_intermediate_relation": { + "name": "default__make_intermediate_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.default__make_intermediate_relation", + "macro_sql": "{% macro default__make_intermediate_relation(base_relation, suffix) %}\n {{ return(default__make_temp_relation(base_relation, suffix)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__make_temp_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9858835, + "supported_languages": null + }, + "macro.dbt.make_temp_relation": { + "name": "make_temp_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.make_temp_relation", + "macro_sql": "{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}\n {{ return(adapter.dispatch('make_temp_relation', 'dbt')(base_relation, suffix)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__make_temp_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.98626, + "supported_languages": null + }, + "macro.dbt.default__make_temp_relation": { + "name": "default__make_temp_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.default__make_temp_relation", + "macro_sql": "{% macro default__make_temp_relation(base_relation, suffix) %}\n {%- set temp_identifier = base_relation.identifier ~ suffix -%}\n {%- set temp_relation = base_relation.incorporate(\n path={\"identifier\": temp_identifier}) -%}\n\n {{ return(temp_relation) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9867983, + "supported_languages": null + }, + "macro.dbt.make_backup_relation": { + "name": "make_backup_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.make_backup_relation", + "macro_sql": "{% macro make_backup_relation(base_relation, backup_relation_type, suffix='__dbt_backup') %}\n {{ return(adapter.dispatch('make_backup_relation', 'dbt')(base_relation, backup_relation_type, suffix)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__make_backup_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9872046, + "supported_languages": null + }, + "macro.dbt.default__make_backup_relation": { + "name": "default__make_backup_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.default__make_backup_relation", + "macro_sql": "{% macro default__make_backup_relation(base_relation, backup_relation_type, suffix) %}\n {%- set backup_identifier = base_relation.identifier ~ suffix -%}\n {%- set backup_relation = base_relation.incorporate(\n path={\"identifier\": backup_identifier},\n type=backup_relation_type\n ) -%}\n {{ return(backup_relation) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9877315, + "supported_languages": null + }, + "macro.dbt.truncate_relation": { + "name": "truncate_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.truncate_relation", + "macro_sql": "{% macro truncate_relation(relation) -%}\n {{ return(adapter.dispatch('truncate_relation', 'dbt')(relation)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__truncate_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.988048, + "supported_languages": null + }, + "macro.dbt.default__truncate_relation": { + "name": "default__truncate_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.default__truncate_relation", + "macro_sql": "{% macro default__truncate_relation(relation) -%}\n {% call statement('truncate_relation') -%}\n truncate table {{ relation }}\n {%- endcall %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9882963, + "supported_languages": null + }, + "macro.dbt.get_or_create_relation": { + "name": "get_or_create_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.get_or_create_relation", + "macro_sql": "{% macro get_or_create_relation(database, schema, identifier, type) -%}\n {{ return(adapter.dispatch('get_or_create_relation', 'dbt')(database, schema, identifier, type)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_or_create_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9886632, + "supported_languages": null + }, + "macro.dbt.default__get_or_create_relation": { + "name": "default__get_or_create_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.default__get_or_create_relation", + "macro_sql": "{% macro default__get_or_create_relation(database, schema, identifier, type) %}\n {%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}\n\n {% if target_relation %}\n {% do return([true, target_relation]) %}\n {% endif %}\n\n {%- set new_relation = api.Relation.create(\n database=database,\n schema=schema,\n identifier=identifier,\n type=type\n ) -%}\n {% do return([false, new_relation]) %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9894717, + "supported_languages": null + }, + "macro.dbt.load_cached_relation": { + "name": "load_cached_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.load_cached_relation", + "macro_sql": "{% macro load_cached_relation(relation) %}\n {% do return(adapter.get_relation(\n database=relation.database,\n schema=relation.schema,\n identifier=relation.identifier\n )) -%}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9898617, + "supported_languages": null + }, + "macro.dbt.load_relation": { + "name": "load_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/relation.sql", + "original_file_path": "macros/adapters/relation.sql", + "unique_id": "macro.dbt.load_relation", + "macro_sql": "{% macro load_relation(relation) %}\n {{ return(load_cached_relation(relation)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.load_cached_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9900863, + "supported_languages": null + }, + "macro.dbt.alter_column_comment": { + "name": "alter_column_comment", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/persist_docs.sql", + "original_file_path": "macros/adapters/persist_docs.sql", + "unique_id": "macro.dbt.alter_column_comment", + "macro_sql": "{% macro alter_column_comment(relation, column_dict) -%}\n {{ return(adapter.dispatch('alter_column_comment', 'dbt')(relation, column_dict)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__alter_column_comment"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9909303, + "supported_languages": null + }, + "macro.dbt.default__alter_column_comment": { + "name": "default__alter_column_comment", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/persist_docs.sql", + "original_file_path": "macros/adapters/persist_docs.sql", + "unique_id": "macro.dbt.default__alter_column_comment", + "macro_sql": "{% macro default__alter_column_comment(relation, column_dict) -%}\n {{ exceptions.raise_not_implemented(\n 'alter_column_comment macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9911819, + "supported_languages": null + }, + "macro.dbt.alter_relation_comment": { + "name": "alter_relation_comment", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/persist_docs.sql", + "original_file_path": "macros/adapters/persist_docs.sql", + "unique_id": "macro.dbt.alter_relation_comment", + "macro_sql": "{% macro alter_relation_comment(relation, relation_comment) -%}\n {{ return(adapter.dispatch('alter_relation_comment', 'dbt')(relation, relation_comment)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__alter_relation_comment"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9915135, + "supported_languages": null + }, + "macro.dbt.default__alter_relation_comment": { + "name": "default__alter_relation_comment", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/persist_docs.sql", + "original_file_path": "macros/adapters/persist_docs.sql", + "unique_id": "macro.dbt.default__alter_relation_comment", + "macro_sql": "{% macro default__alter_relation_comment(relation, relation_comment) -%}\n {{ exceptions.raise_not_implemented(\n 'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9917657, + "supported_languages": null + }, + "macro.dbt.persist_docs": { + "name": "persist_docs", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/persist_docs.sql", + "original_file_path": "macros/adapters/persist_docs.sql", + "unique_id": "macro.dbt.persist_docs", + "macro_sql": "{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}\n {{ return(adapter.dispatch('persist_docs', 'dbt')(relation, model, for_relation, for_columns)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__persist_docs"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9921472, + "supported_languages": null + }, + "macro.dbt.default__persist_docs": { + "name": "default__persist_docs", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/persist_docs.sql", + "original_file_path": "macros/adapters/persist_docs.sql", + "unique_id": "macro.dbt.default__persist_docs", + "macro_sql": "{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}\n {% if for_relation and config.persist_relation_docs() and model.description %}\n {% do run_query(alter_relation_comment(relation, model.description)) %}\n {% endif %}\n\n {% if for_columns and config.persist_column_docs() and model.columns %}\n {% do run_query(alter_column_comment(relation, model.columns)) %}\n {% endif %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.run_query", + "macro.dbt.alter_relation_comment", + "macro.dbt.alter_column_comment" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9929318, + "supported_languages": null + }, + "macro.dbt.collect_freshness": { + "name": "collect_freshness", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/freshness.sql", + "original_file_path": "macros/adapters/freshness.sql", + "unique_id": "macro.dbt.collect_freshness", + "macro_sql": "{% macro collect_freshness(source, loaded_at_field, filter) %}\n {{ return(adapter.dispatch('collect_freshness', 'dbt')(source, loaded_at_field, filter))}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__collect_freshness"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9935231, + "supported_languages": null + }, + "macro.dbt.default__collect_freshness": { + "name": "default__collect_freshness", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/freshness.sql", + "original_file_path": "macros/adapters/freshness.sql", + "unique_id": "macro.dbt.default__collect_freshness", + "macro_sql": "{% macro default__collect_freshness(source, loaded_at_field, filter) %}\n {% call statement('collect_freshness', fetch_result=True, auto_begin=False) -%}\n select\n max({{ loaded_at_field }}) as max_loaded_at,\n {{ current_timestamp() }} as snapshotted_at\n from {{ source }}\n {% if filter %}\n where {{ filter }}\n {% endif %}\n {% endcall %}\n {{ return(load_result('collect_freshness')) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement", "macro.dbt.current_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9941256, + "supported_languages": null + }, + "macro.dbt.copy_grants": { + "name": "copy_grants", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.copy_grants", + "macro_sql": "{% macro copy_grants() %}\n {{ return(adapter.dispatch('copy_grants', 'dbt')()) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__copy_grants"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.996004, + "supported_languages": null + }, + "macro.dbt.default__copy_grants": { + "name": "default__copy_grants", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.default__copy_grants", + "macro_sql": "{% macro default__copy_grants() %}\n {{ return(True) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.996179, + "supported_languages": null + }, + "macro.dbt.support_multiple_grantees_per_dcl_statement": { + "name": "support_multiple_grantees_per_dcl_statement", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.support_multiple_grantees_per_dcl_statement", + "macro_sql": "{% macro support_multiple_grantees_per_dcl_statement() %}\n {{ return(adapter.dispatch('support_multiple_grantees_per_dcl_statement', 'dbt')()) }}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.default__support_multiple_grantees_per_dcl_statement" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.996425, + "supported_languages": null + }, + "macro.dbt.default__support_multiple_grantees_per_dcl_statement": { + "name": "default__support_multiple_grantees_per_dcl_statement", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.default__support_multiple_grantees_per_dcl_statement", + "macro_sql": "\n\n{%- macro default__support_multiple_grantees_per_dcl_statement() -%}\n {{ return(True) }}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9966264, + "supported_languages": null + }, + "macro.dbt.should_revoke": { + "name": "should_revoke", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.should_revoke", + "macro_sql": "{% macro should_revoke(existing_relation, full_refresh_mode=True) %}\n\n {% if not existing_relation %}\n {#-- The table doesn't already exist, so no grants to copy over --#}\n {{ return(False) }}\n {% elif full_refresh_mode %}\n {#-- The object is being REPLACED -- whether grants are copied over depends on the value of user config --#}\n {{ return(copy_grants()) }}\n {% else %}\n {#-- The table is being merged/upserted/inserted -- grants will be carried over --#}\n {{ return(True) }}\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.copy_grants"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9971359, + "supported_languages": null + }, + "macro.dbt.get_show_grant_sql": { + "name": "get_show_grant_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.get_show_grant_sql", + "macro_sql": "{% macro get_show_grant_sql(relation) %}\n {{ return(adapter.dispatch(\"get_show_grant_sql\", \"dbt\")(relation)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_show_grant_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9974346, + "supported_languages": null + }, + "macro.dbt.default__get_show_grant_sql": { + "name": "default__get_show_grant_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.default__get_show_grant_sql", + "macro_sql": "{% macro default__get_show_grant_sql(relation) %}\n show grants on {{ relation }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9975927, + "supported_languages": null + }, + "macro.dbt.get_grant_sql": { + "name": "get_grant_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.get_grant_sql", + "macro_sql": "{% macro get_grant_sql(relation, privilege, grantees) %}\n {{ return(adapter.dispatch('get_grant_sql', 'dbt')(relation, privilege, grantees)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_grant_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.997924, + "supported_languages": null + }, + "macro.dbt.default__get_grant_sql": { + "name": "default__get_grant_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.default__get_grant_sql", + "macro_sql": "\n\n{%- macro default__get_grant_sql(relation, privilege, grantees) -%}\n grant {{ privilege }} on {{ relation }} to {{ grantees | join(', ') }}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9982288, + "supported_languages": null + }, + "macro.dbt.get_revoke_sql": { + "name": "get_revoke_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.get_revoke_sql", + "macro_sql": "{% macro get_revoke_sql(relation, privilege, grantees) %}\n {{ return(adapter.dispatch('get_revoke_sql', 'dbt')(relation, privilege, grantees)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_revoke_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9985805, + "supported_languages": null + }, + "macro.dbt.default__get_revoke_sql": { + "name": "default__get_revoke_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.default__get_revoke_sql", + "macro_sql": "\n\n{%- macro default__get_revoke_sql(relation, privilege, grantees) -%}\n revoke {{ privilege }} on {{ relation }} from {{ grantees | join(', ') }}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9989767, + "supported_languages": null + }, + "macro.dbt.get_dcl_statement_list": { + "name": "get_dcl_statement_list", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.get_dcl_statement_list", + "macro_sql": "{% macro get_dcl_statement_list(relation, grant_config, get_dcl_macro) %}\n {{ return(adapter.dispatch('get_dcl_statement_list', 'dbt')(relation, grant_config, get_dcl_macro)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_dcl_statement_list"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458071.9993153, + "supported_languages": null + }, + "macro.dbt.default__get_dcl_statement_list": { + "name": "default__get_dcl_statement_list", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.default__get_dcl_statement_list", + "macro_sql": "\n\n{%- macro default__get_dcl_statement_list(relation, grant_config, get_dcl_macro) -%}\n {#\n -- Unpack grant_config into specific privileges and the set of users who need them granted/revoked.\n -- Depending on whether this database supports multiple grantees per statement, pass in the list of\n -- all grantees per privilege, or (if not) template one statement per privilege-grantee pair.\n -- `get_dcl_macro` will be either `get_grant_sql` or `get_revoke_sql`\n #}\n {%- set dcl_statements = [] -%}\n {%- for privilege, grantees in grant_config.items() %}\n {%- if support_multiple_grantees_per_dcl_statement() and grantees -%}\n {%- set dcl = get_dcl_macro(relation, privilege, grantees) -%}\n {%- do dcl_statements.append(dcl) -%}\n {%- else -%}\n {%- for grantee in grantees -%}\n {% set dcl = get_dcl_macro(relation, privilege, [grantee]) %}\n {%- do dcl_statements.append(dcl) -%}\n {% endfor -%}\n {%- endif -%}\n {%- endfor -%}\n {{ return(dcl_statements) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.support_multiple_grantees_per_dcl_statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.000666, + "supported_languages": null + }, + "macro.dbt.call_dcl_statements": { + "name": "call_dcl_statements", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.call_dcl_statements", + "macro_sql": "{% macro call_dcl_statements(dcl_statement_list) %}\n {{ return(adapter.dispatch(\"call_dcl_statements\", \"dbt\")(dcl_statement_list)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__call_dcl_statements"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0009596, + "supported_languages": null + }, + "macro.dbt.default__call_dcl_statements": { + "name": "default__call_dcl_statements", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.default__call_dcl_statements", + "macro_sql": "{% macro default__call_dcl_statements(dcl_statement_list) %}\n {#\n -- By default, supply all grant + revoke statements in a single semicolon-separated block,\n -- so that they're all processed together.\n\n -- Some databases do not support this. Those adapters will need to override this macro\n -- to run each statement individually.\n #}\n {% call statement('grants') %}\n {% for dcl_statement in dcl_statement_list %}\n {{ dcl_statement }};\n {% endfor %}\n {% endcall %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0013223, + "supported_languages": null + }, + "macro.dbt.apply_grants": { + "name": "apply_grants", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.apply_grants", + "macro_sql": "{% macro apply_grants(relation, grant_config, should_revoke) %}\n {{ return(adapter.dispatch(\"apply_grants\", \"dbt\")(relation, grant_config, should_revoke)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__apply_grants"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.001676, + "supported_languages": null + }, + "macro.dbt.default__apply_grants": { + "name": "default__apply_grants", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/apply_grants.sql", + "original_file_path": "macros/adapters/apply_grants.sql", + "unique_id": "macro.dbt.default__apply_grants", + "macro_sql": "{% macro default__apply_grants(relation, grant_config, should_revoke=True) %}\n {#-- If grant_config is {} or None, this is a no-op --#}\n {% if grant_config %}\n {% if should_revoke %}\n {#-- We think previous grants may have carried over --#}\n {#-- Show current grants and calculate diffs --#}\n {% set current_grants_table = run_query(get_show_grant_sql(relation)) %}\n {% set current_grants_dict = adapter.standardize_grants_dict(current_grants_table) %}\n {% set needs_granting = diff_of_two_dicts(grant_config, current_grants_dict) %}\n {% set needs_revoking = diff_of_two_dicts(current_grants_dict, grant_config) %}\n {% if not (needs_granting or needs_revoking) %}\n {{ log('On ' ~ relation ~': All grants are in place, no revocation or granting needed.')}}\n {% endif %}\n {% else %}\n {#-- We don't think there's any chance of previous grants having carried over. --#}\n {#-- Jump straight to granting what the user has configured. --#}\n {% set needs_revoking = {} %}\n {% set needs_granting = grant_config %}\n {% endif %}\n {% if needs_granting or needs_revoking %}\n {% set revoke_statement_list = get_dcl_statement_list(relation, needs_revoking, get_revoke_sql) %}\n {% set grant_statement_list = get_dcl_statement_list(relation, needs_granting, get_grant_sql) %}\n {% set dcl_statement_list = revoke_statement_list + grant_statement_list %}\n {% if dcl_statement_list %}\n {{ call_dcl_statements(dcl_statement_list) }}\n {% endif %}\n {% endif %}\n {% endif %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.run_query", + "macro.dbt.get_show_grant_sql", + "macro.dbt.get_dcl_statement_list", + "macro.dbt.call_dcl_statements" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0033882, + "supported_languages": null + }, + "macro.dbt.current_timestamp": { + "name": "current_timestamp", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/timestamps.sql", + "original_file_path": "macros/adapters/timestamps.sql", + "unique_id": "macro.dbt.current_timestamp", + "macro_sql": "{%- macro current_timestamp() -%}\n {{ adapter.dispatch('current_timestamp', 'dbt')() }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__current_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0040789, + "supported_languages": null + }, + "macro.dbt.default__current_timestamp": { + "name": "default__current_timestamp", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/timestamps.sql", + "original_file_path": "macros/adapters/timestamps.sql", + "unique_id": "macro.dbt.default__current_timestamp", + "macro_sql": "{% macro default__current_timestamp() -%}\n {{ exceptions.raise_not_implemented(\n 'current_timestamp macro not implemented for adapter ' + adapter.type()) }}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0043085, + "supported_languages": null + }, + "macro.dbt.snapshot_get_time": { + "name": "snapshot_get_time", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/timestamps.sql", + "original_file_path": "macros/adapters/timestamps.sql", + "unique_id": "macro.dbt.snapshot_get_time", + "macro_sql": "\n\n{%- macro snapshot_get_time() -%}\n {{ adapter.dispatch('snapshot_get_time', 'dbt')() }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__snapshot_get_time"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.004519, + "supported_languages": null + }, + "macro.dbt.default__snapshot_get_time": { + "name": "default__snapshot_get_time", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/timestamps.sql", + "original_file_path": "macros/adapters/timestamps.sql", + "unique_id": "macro.dbt.default__snapshot_get_time", + "macro_sql": "{% macro default__snapshot_get_time() %}\n {{ current_timestamp() }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.current_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0046766, + "supported_languages": null + }, + "macro.dbt.current_timestamp_backcompat": { + "name": "current_timestamp_backcompat", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/timestamps.sql", + "original_file_path": "macros/adapters/timestamps.sql", + "unique_id": "macro.dbt.current_timestamp_backcompat", + "macro_sql": "{% macro current_timestamp_backcompat() %}\n {{ return(adapter.dispatch('current_timestamp_backcompat', 'dbt')()) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__current_timestamp_backcompat"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0049481, + "supported_languages": null + }, + "macro.dbt.default__current_timestamp_backcompat": { + "name": "default__current_timestamp_backcompat", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/timestamps.sql", + "original_file_path": "macros/adapters/timestamps.sql", + "unique_id": "macro.dbt.default__current_timestamp_backcompat", + "macro_sql": "{% macro default__current_timestamp_backcompat() %}\n current_timestamp::timestamp\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0050728, + "supported_languages": null + }, + "macro.dbt.current_timestamp_in_utc_backcompat": { + "name": "current_timestamp_in_utc_backcompat", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/timestamps.sql", + "original_file_path": "macros/adapters/timestamps.sql", + "unique_id": "macro.dbt.current_timestamp_in_utc_backcompat", + "macro_sql": "{% macro current_timestamp_in_utc_backcompat() %}\n {{ return(adapter.dispatch('current_timestamp_in_utc_backcompat', 'dbt')()) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__current_timestamp_in_utc_backcompat"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0053542, + "supported_languages": null + }, + "macro.dbt.default__current_timestamp_in_utc_backcompat": { + "name": "default__current_timestamp_in_utc_backcompat", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/timestamps.sql", + "original_file_path": "macros/adapters/timestamps.sql", + "unique_id": "macro.dbt.default__current_timestamp_in_utc_backcompat", + "macro_sql": "{% macro default__current_timestamp_in_utc_backcompat() %}\n {{ return(adapter.dispatch('current_timestamp_backcompat', 'dbt')()) }}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.current_timestamp_backcompat", + "macro.dbt.default__current_timestamp_backcompat" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0056086, + "supported_languages": null + }, + "macro.dbt.validate_sql": { + "name": "validate_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/validate_sql.sql", + "original_file_path": "macros/adapters/validate_sql.sql", + "unique_id": "macro.dbt.validate_sql", + "macro_sql": "{% macro validate_sql(sql) -%}\n {{ return(adapter.dispatch('validate_sql', 'dbt')(sql)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__validate_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0060096, + "supported_languages": null + }, + "macro.dbt.default__validate_sql": { + "name": "default__validate_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/validate_sql.sql", + "original_file_path": "macros/adapters/validate_sql.sql", + "unique_id": "macro.dbt.default__validate_sql", + "macro_sql": "{% macro default__validate_sql(sql) -%}\n {% call statement('validate_sql') -%}\n explain {{ sql }}\n {% endcall %}\n {{ return(load_result('validate_sql')) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.006343, + "supported_languages": null + }, + "macro.dbt.get_create_index_sql": { + "name": "get_create_index_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/indexes.sql", + "original_file_path": "macros/adapters/indexes.sql", + "unique_id": "macro.dbt.get_create_index_sql", + "macro_sql": "{% macro get_create_index_sql(relation, index_dict) -%}\n {{ return(adapter.dispatch('get_create_index_sql', 'dbt')(relation, index_dict)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_create_index_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0073478, + "supported_languages": null + }, + "macro.dbt.default__get_create_index_sql": { + "name": "default__get_create_index_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/indexes.sql", + "original_file_path": "macros/adapters/indexes.sql", + "unique_id": "macro.dbt.default__get_create_index_sql", + "macro_sql": "{% macro default__get_create_index_sql(relation, index_dict) -%}\n {% do return(None) %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0075884, + "supported_languages": null + }, + "macro.dbt.create_indexes": { + "name": "create_indexes", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/indexes.sql", + "original_file_path": "macros/adapters/indexes.sql", + "unique_id": "macro.dbt.create_indexes", + "macro_sql": "{% macro create_indexes(relation) -%}\n {{ adapter.dispatch('create_indexes', 'dbt')(relation) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__create_indexes"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0078223, + "supported_languages": null + }, + "macro.dbt.default__create_indexes": { + "name": "default__create_indexes", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/indexes.sql", + "original_file_path": "macros/adapters/indexes.sql", + "unique_id": "macro.dbt.default__create_indexes", + "macro_sql": "{% macro default__create_indexes(relation) -%}\n {%- set _indexes = config.get('indexes', default=[]) -%}\n\n {% for _index_dict in _indexes %}\n {% set create_index_sql = get_create_index_sql(relation, _index_dict) %}\n {% if create_index_sql %}\n {% do run_query(create_index_sql) %}\n {% endif %}\n {% endfor %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_create_index_sql", "macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0084178, + "supported_languages": null + }, + "macro.dbt.get_drop_index_sql": { + "name": "get_drop_index_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/indexes.sql", + "original_file_path": "macros/adapters/indexes.sql", + "unique_id": "macro.dbt.get_drop_index_sql", + "macro_sql": "{% macro get_drop_index_sql(relation, index_name) -%}\n {{ adapter.dispatch('get_drop_index_sql', 'dbt')(relation, index_name) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_drop_index_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0087202, + "supported_languages": null + }, + "macro.dbt.default__get_drop_index_sql": { + "name": "default__get_drop_index_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/indexes.sql", + "original_file_path": "macros/adapters/indexes.sql", + "unique_id": "macro.dbt.default__get_drop_index_sql", + "macro_sql": "{% macro default__get_drop_index_sql(relation, index_name) -%}\n {{ exceptions.raise_compiler_error(\"`get_drop_index_sql has not been implemented for this adapter.\") }}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.008933, + "supported_languages": null + }, + "macro.dbt.get_show_indexes_sql": { + "name": "get_show_indexes_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/indexes.sql", + "original_file_path": "macros/adapters/indexes.sql", + "unique_id": "macro.dbt.get_show_indexes_sql", + "macro_sql": "{% macro get_show_indexes_sql(relation) -%}\n {{ adapter.dispatch('get_show_indexes_sql', 'dbt')(relation) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_show_indexes_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.009169, + "supported_languages": null + }, + "macro.dbt.default__get_show_indexes_sql": { + "name": "default__get_show_indexes_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/indexes.sql", + "original_file_path": "macros/adapters/indexes.sql", + "unique_id": "macro.dbt.default__get_show_indexes_sql", + "macro_sql": "{% macro default__get_show_indexes_sql(relation) -%}\n {{ exceptions.raise_compiler_error(\"`get_show_indexes_sql has not been implemented for this adapter.\") }}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0094485, + "supported_languages": null + }, + "macro.dbt.create_schema": { + "name": "create_schema", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/schema.sql", + "original_file_path": "macros/adapters/schema.sql", + "unique_id": "macro.dbt.create_schema", + "macro_sql": "{% macro create_schema(relation) -%}\n {{ adapter.dispatch('create_schema', 'dbt')(relation) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__create_schema"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.009969, + "supported_languages": null + }, + "macro.dbt.default__create_schema": { + "name": "default__create_schema", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/schema.sql", + "original_file_path": "macros/adapters/schema.sql", + "unique_id": "macro.dbt.default__create_schema", + "macro_sql": "{% macro default__create_schema(relation) -%}\n {%- call statement('create_schema') -%}\n create schema if not exists {{ relation.without_identifier() }}\n {% endcall %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0102503, + "supported_languages": null + }, + "macro.dbt.drop_schema": { + "name": "drop_schema", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/schema.sql", + "original_file_path": "macros/adapters/schema.sql", + "unique_id": "macro.dbt.drop_schema", + "macro_sql": "{% macro drop_schema(relation) -%}\n {{ adapter.dispatch('drop_schema', 'dbt')(relation) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__drop_schema"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0105247, + "supported_languages": null + }, + "macro.dbt.default__drop_schema": { + "name": "default__drop_schema", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/schema.sql", + "original_file_path": "macros/adapters/schema.sql", + "unique_id": "macro.dbt.default__drop_schema", + "macro_sql": "{% macro default__drop_schema(relation) -%}\n {%- call statement('drop_schema') -%}\n drop schema if exists {{ relation.without_identifier() }} cascade\n {% endcall %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.01081, + "supported_languages": null + }, + "macro.dbt.get_show_sql": { + "name": "get_show_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/show.sql", + "original_file_path": "macros/adapters/show.sql", + "unique_id": "macro.dbt.get_show_sql", + "macro_sql": "{% macro get_show_sql(compiled_code, sql_header, limit) -%}\n {%- if sql_header -%}\n {{ sql_header }}\n {%- endif -%}\n {%- if limit is not none -%}\n {{ get_limit_subquery_sql(compiled_code, limit) }}\n {%- else -%}\n {{ compiled_code }}\n {%- endif -%}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_limit_subquery_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0114985, + "supported_languages": null + }, + "macro.dbt.get_limit_subquery_sql": { + "name": "get_limit_subquery_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/show.sql", + "original_file_path": "macros/adapters/show.sql", + "unique_id": "macro.dbt.get_limit_subquery_sql", + "macro_sql": "{% macro get_limit_subquery_sql(sql, limit) %}\n {{ adapter.dispatch('get_limit_subquery_sql', 'dbt')(sql, limit) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_limit_subquery_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0117784, + "supported_languages": null + }, + "macro.dbt.default__get_limit_subquery_sql": { + "name": "default__get_limit_subquery_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/show.sql", + "original_file_path": "macros/adapters/show.sql", + "unique_id": "macro.dbt.default__get_limit_subquery_sql", + "macro_sql": "{% macro default__get_limit_subquery_sql(sql, limit) %}\n select *\n from (\n {{ sql }}\n ) as model_limit_subq\n limit {{ limit }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0119772, + "supported_languages": null + }, + "macro.dbt.get_catalog_relations": { + "name": "get_catalog_relations", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.get_catalog_relations", + "macro_sql": "{% macro get_catalog_relations(information_schema, relations) -%}\n {{ return(adapter.dispatch('get_catalog_relations', 'dbt')(information_schema, relations)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_catalog_relations"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.015076, + "supported_languages": null + }, + "macro.dbt.default__get_catalog_relations": { + "name": "default__get_catalog_relations", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.default__get_catalog_relations", + "macro_sql": "{% macro default__get_catalog_relations(information_schema, relations) -%}\n {% set typename = adapter.type() %}\n {% set msg -%}\n get_catalog_relations not implemented for {{ typename }}\n {%- endset %}\n\n {{ exceptions.raise_compiler_error(msg) }}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0154586, + "supported_languages": null + }, + "macro.dbt.get_catalog": { + "name": "get_catalog", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.get_catalog", + "macro_sql": "{% macro get_catalog(information_schema, schemas) -%}\n {{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__get_catalog"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.015793, + "supported_languages": null + }, + "macro.dbt.default__get_catalog": { + "name": "default__get_catalog", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.default__get_catalog", + "macro_sql": "{% macro default__get_catalog(information_schema, schemas) -%}\n\n {% set typename = adapter.type() %}\n {% set msg -%}\n get_catalog not implemented for {{ typename }}\n {%- endset %}\n\n {{ exceptions.raise_compiler_error(msg) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0161824, + "supported_languages": null + }, + "macro.dbt.information_schema_name": { + "name": "information_schema_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.information_schema_name", + "macro_sql": "{% macro information_schema_name(database) %}\n {{ return(adapter.dispatch('information_schema_name', 'dbt')(database)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__information_schema_name"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.016455, + "supported_languages": null + }, + "macro.dbt.default__information_schema_name": { + "name": "default__information_schema_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.default__information_schema_name", + "macro_sql": "{% macro default__information_schema_name(database) -%}\n {%- if database -%}\n {{ database }}.INFORMATION_SCHEMA\n {%- else -%}\n INFORMATION_SCHEMA\n {%- endif -%}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.016686, + "supported_languages": null + }, + "macro.dbt.list_schemas": { + "name": "list_schemas", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.list_schemas", + "macro_sql": "{% macro list_schemas(database) -%}\n {{ return(adapter.dispatch('list_schemas', 'dbt')(database)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__list_schemas"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0169492, + "supported_languages": null + }, + "macro.dbt.default__list_schemas": { + "name": "default__list_schemas", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.default__list_schemas", + "macro_sql": "{% macro default__list_schemas(database) -%}\n {% set sql %}\n select distinct schema_name\n from {{ information_schema_name(database) }}.SCHEMATA\n where catalog_name ilike '{{ database }}'\n {% endset %}\n {{ return(run_query(sql)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.information_schema_name", "macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0173454, + "supported_languages": null + }, + "macro.dbt.check_schema_exists": { + "name": "check_schema_exists", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.check_schema_exists", + "macro_sql": "{% macro check_schema_exists(information_schema, schema) -%}\n {{ return(adapter.dispatch('check_schema_exists', 'dbt')(information_schema, schema)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__check_schema_exists"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0176973, + "supported_languages": null + }, + "macro.dbt.default__check_schema_exists": { + "name": "default__check_schema_exists", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.default__check_schema_exists", + "macro_sql": "{% macro default__check_schema_exists(information_schema, schema) -%}\n {% set sql -%}\n select count(*)\n from {{ information_schema.replace(information_schema_view='SCHEMATA') }}\n where catalog_name='{{ information_schema.database }}'\n and schema_name='{{ schema }}'\n {%- endset %}\n {{ return(run_query(sql)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.replace", "macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.018226, + "supported_languages": null + }, + "macro.dbt.list_relations_without_caching": { + "name": "list_relations_without_caching", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.list_relations_without_caching", + "macro_sql": "{% macro list_relations_without_caching(schema_relation) %}\n {{ return(adapter.dispatch('list_relations_without_caching', 'dbt')(schema_relation)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__list_relations_without_caching"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0184965, + "supported_languages": null + }, + "macro.dbt.default__list_relations_without_caching": { + "name": "default__list_relations_without_caching", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.default__list_relations_without_caching", + "macro_sql": "{% macro default__list_relations_without_caching(schema_relation) %}\n {{ exceptions.raise_not_implemented(\n 'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.01874, + "supported_languages": null + }, + "macro.dbt.get_relations": { + "name": "get_relations", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.get_relations", + "macro_sql": "{% macro get_relations() %}\n {{ return(adapter.dispatch('get_relations', 'dbt')()) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_relations"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0190268, + "supported_languages": null + }, + "macro.dbt.default__get_relations": { + "name": "default__get_relations", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.default__get_relations", + "macro_sql": "{% macro default__get_relations() %}\n {{ exceptions.raise_not_implemented(\n 'get_relations macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0192914, + "supported_languages": null + }, + "macro.dbt.get_relation_last_modified": { + "name": "get_relation_last_modified", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.get_relation_last_modified", + "macro_sql": "{% macro get_relation_last_modified(information_schema, relations) %}\n {{ return(adapter.dispatch('get_relation_last_modified', 'dbt')(information_schema, relations)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_relation_last_modified"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0196042, + "supported_languages": null + }, + "macro.dbt.default__get_relation_last_modified": { + "name": "default__get_relation_last_modified", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/adapters/metadata.sql", + "original_file_path": "macros/adapters/metadata.sql", + "unique_id": "macro.dbt.default__get_relation_last_modified", + "macro_sql": "{% macro default__get_relation_last_modified(information_schema, relations) %}\n {{ exceptions.raise_not_implemented(\n 'get_relation_last_modified macro not implemented for adapter ' + adapter.type()) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.019863, + "supported_languages": null + }, + "macro.dbt.generate_schema_name": { + "name": "generate_schema_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/get_custom_name/get_custom_schema.sql", + "original_file_path": "macros/get_custom_name/get_custom_schema.sql", + "unique_id": "macro.dbt.generate_schema_name", + "macro_sql": "{% macro generate_schema_name(custom_schema_name=none, node=none) -%}\n {{ return(adapter.dispatch('generate_schema_name', 'dbt')(custom_schema_name, node)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__generate_schema_name"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0205266, + "supported_languages": null + }, + "macro.dbt.default__generate_schema_name": { + "name": "default__generate_schema_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/get_custom_name/get_custom_schema.sql", + "original_file_path": "macros/get_custom_name/get_custom_schema.sql", + "unique_id": "macro.dbt.default__generate_schema_name", + "macro_sql": "{% macro default__generate_schema_name(custom_schema_name, node) -%}\n\n {%- set default_schema = target.schema -%}\n {%- if custom_schema_name is none -%}\n\n {{ default_schema }}\n\n {%- else -%}\n\n {{ default_schema }}_{{ custom_schema_name | trim }}\n\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.020968, + "supported_languages": null + }, + "macro.dbt.generate_schema_name_for_env": { + "name": "generate_schema_name_for_env", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/get_custom_name/get_custom_schema.sql", + "original_file_path": "macros/get_custom_name/get_custom_schema.sql", + "unique_id": "macro.dbt.generate_schema_name_for_env", + "macro_sql": "{% macro generate_schema_name_for_env(custom_schema_name, node) -%}\n\n {%- set default_schema = target.schema -%}\n {%- if target.name == 'prod' and custom_schema_name is not none -%}\n\n {{ custom_schema_name | trim }}\n\n {%- else -%}\n\n {{ default_schema }}\n\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0214088, + "supported_languages": null + }, + "macro.dbt.generate_database_name": { + "name": "generate_database_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/get_custom_name/get_custom_database.sql", + "original_file_path": "macros/get_custom_name/get_custom_database.sql", + "unique_id": "macro.dbt.generate_database_name", + "macro_sql": "{% macro generate_database_name(custom_database_name=none, node=none) -%}\n {% do return(adapter.dispatch('generate_database_name', 'dbt')(custom_database_name, node)) %}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__generate_database_name"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0219374, + "supported_languages": null + }, + "macro.dbt.default__generate_database_name": { + "name": "default__generate_database_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/get_custom_name/get_custom_database.sql", + "original_file_path": "macros/get_custom_name/get_custom_database.sql", + "unique_id": "macro.dbt.default__generate_database_name", + "macro_sql": "{% macro default__generate_database_name(custom_database_name=none, node=none) -%}\n {%- set default_database = target.database -%}\n {%- if custom_database_name is none -%}\n\n {{ default_database }}\n\n {%- else -%}\n\n {{ custom_database_name }}\n\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0223157, + "supported_languages": null + }, + "macro.dbt.generate_alias_name": { + "name": "generate_alias_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/get_custom_name/get_custom_alias.sql", + "original_file_path": "macros/get_custom_name/get_custom_alias.sql", + "unique_id": "macro.dbt.generate_alias_name", + "macro_sql": "{% macro generate_alias_name(custom_alias_name=none, node=none) -%}\n {% do return(adapter.dispatch('generate_alias_name', 'dbt')(custom_alias_name, node)) %}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__generate_alias_name"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0228853, + "supported_languages": null + }, + "macro.dbt.default__generate_alias_name": { + "name": "default__generate_alias_name", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/get_custom_name/get_custom_alias.sql", + "original_file_path": "macros/get_custom_name/get_custom_alias.sql", + "unique_id": "macro.dbt.default__generate_alias_name", + "macro_sql": "{% macro default__generate_alias_name(custom_alias_name=none, node=none) -%}\n\n {%- if custom_alias_name -%}\n\n {{ custom_alias_name | trim }}\n\n {%- elif node.version -%}\n\n {{ return(node.name ~ \"_v\" ~ (node.version | replace(\".\", \"_\"))) }}\n\n {%- else -%}\n\n {{ node.name }}\n\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.023471, + "supported_languages": null + }, + "macro.dbt.convert_datetime": { + "name": "convert_datetime", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/etc/datetime.sql", + "original_file_path": "macros/etc/datetime.sql", + "unique_id": "macro.dbt.convert_datetime", + "macro_sql": "{% macro convert_datetime(date_str, date_fmt) %}\n\n {% set error_msg -%}\n The provided partition date '{{ date_str }}' does not match the expected format '{{ date_fmt }}'\n {%- endset %}\n\n {% set res = try_or_compiler_error(error_msg, modules.datetime.datetime.strptime, date_str.strip(), date_fmt) %}\n {{ return(res) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0254562, + "supported_languages": null + }, + "macro.dbt.dates_in_range": { + "name": "dates_in_range", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/etc/datetime.sql", + "original_file_path": "macros/etc/datetime.sql", + "unique_id": "macro.dbt.dates_in_range", + "macro_sql": "{% macro dates_in_range(start_date_str, end_date_str=none, in_fmt=\"%Y%m%d\", out_fmt=\"%Y%m%d\") %}\n {% set end_date_str = start_date_str if end_date_str is none else end_date_str %}\n\n {% set start_date = convert_datetime(start_date_str, in_fmt) %}\n {% set end_date = convert_datetime(end_date_str, in_fmt) %}\n\n {% set day_count = (end_date - start_date).days %}\n {% if day_count < 0 %}\n {% set msg -%}\n Partition start date is after the end date ({{ start_date }}, {{ end_date }})\n {%- endset %}\n\n {{ exceptions.raise_compiler_error(msg, model) }}\n {% endif %}\n\n {% set date_list = [] %}\n {% for i in range(0, day_count + 1) %}\n {% set the_date = (modules.datetime.timedelta(days=i) + start_date) %}\n {% if not out_fmt %}\n {% set _ = date_list.append(the_date) %}\n {% else %}\n {% set _ = date_list.append(the_date.strftime(out_fmt)) %}\n {% endif %}\n {% endfor %}\n\n {{ return(date_list) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.convert_datetime"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0273585, + "supported_languages": null + }, + "macro.dbt.partition_range": { + "name": "partition_range", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/etc/datetime.sql", + "original_file_path": "macros/etc/datetime.sql", + "unique_id": "macro.dbt.partition_range", + "macro_sql": "{% macro partition_range(raw_partition_date, date_fmt='%Y%m%d') %}\n {% set partition_range = (raw_partition_date | string).split(\",\") %}\n\n {% if (partition_range | length) == 1 %}\n {% set start_date = partition_range[0] %}\n {% set end_date = none %}\n {% elif (partition_range | length) == 2 %}\n {% set start_date = partition_range[0] %}\n {% set end_date = partition_range[1] %}\n {% else %}\n {{ exceptions.raise_compiler_error(\"Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: \" ~ raw_partition_date) }}\n {% endif %}\n\n {{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.dates_in_range"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0286102, + "supported_languages": null + }, + "macro.dbt.py_current_timestring": { + "name": "py_current_timestring", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/etc/datetime.sql", + "original_file_path": "macros/etc/datetime.sql", + "unique_id": "macro.dbt.py_current_timestring", + "macro_sql": "{% macro py_current_timestring() %}\n {% set dt = modules.datetime.datetime.now() %}\n {% do return(dt.strftime(\"%Y%m%d%H%M%S%f\")) %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0289621, + "supported_languages": null + }, + "macro.dbt.statement": { + "name": "statement", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/etc/statement.sql", + "original_file_path": "macros/etc/statement.sql", + "unique_id": "macro.dbt.statement", + "macro_sql": "\n{%- macro statement(name=None, fetch_result=False, auto_begin=True, language='sql') -%}\n {%- if execute: -%}\n {%- set compiled_code = caller() -%}\n\n {%- if name == 'main' -%}\n {{ log('Writing runtime {} for node \"{}\"'.format(language, model['unique_id'])) }}\n {{ write(compiled_code) }}\n {%- endif -%}\n {%- if language == 'sql'-%}\n {%- set res, table = adapter.execute(compiled_code, auto_begin=auto_begin, fetch=fetch_result) -%}\n {%- elif language == 'python' -%}\n {%- set res = submit_python_job(model, compiled_code) -%}\n {#-- TODO: What should table be for python models? --#}\n {%- set table = None -%}\n {%- else -%}\n {% do exceptions.raise_compiler_error(\"statement macro didn't get supported language\") %}\n {%- endif -%}\n\n {%- if name is not none -%}\n {{ store_result(name, response=res, agate_table=table) }}\n {%- endif -%}\n\n {%- endif -%}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.031051, + "supported_languages": null + }, + "macro.dbt.noop_statement": { + "name": "noop_statement", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/etc/statement.sql", + "original_file_path": "macros/etc/statement.sql", + "unique_id": "macro.dbt.noop_statement", + "macro_sql": "{% macro noop_statement(name=None, message=None, code=None, rows_affected=None, res=None) -%}\n {%- set sql = caller() -%}\n\n {%- if name == 'main' -%}\n {{ log('Writing runtime SQL for node \"{}\"'.format(model['unique_id'])) }}\n {{ write(sql) }}\n {%- endif -%}\n\n {%- if name is not none -%}\n {{ store_raw_result(name, message=message, code=code, rows_affected=rows_affected, agate_table=res) }}\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0319781, + "supported_languages": null + }, + "macro.dbt.run_query": { + "name": "run_query", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/etc/statement.sql", + "original_file_path": "macros/etc/statement.sql", + "unique_id": "macro.dbt.run_query", + "macro_sql": "{% macro run_query(sql) %}\n {% call statement(\"run_query_statement\", fetch_result=true, auto_begin=false) %}\n {{ sql }}\n {% endcall %}\n\n {% do return(load_result(\"run_query_statement\").table) %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0324264, + "supported_languages": null + }, + "macro.dbt.default__test_unique": { + "name": "default__test_unique", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/generic_test_sql/unique.sql", + "original_file_path": "macros/generic_test_sql/unique.sql", + "unique_id": "macro.dbt.default__test_unique", + "macro_sql": "{% macro default__test_unique(model, column_name) %}\n\nselect\n {{ column_name }} as unique_field,\n count(*) as n_records\n\nfrom {{ model }}\nwhere {{ column_name }} is not null\ngroup by {{ column_name }}\nhaving count(*) > 1\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0327823, + "supported_languages": null + }, + "macro.dbt.default__test_not_null": { + "name": "default__test_not_null", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/generic_test_sql/not_null.sql", + "original_file_path": "macros/generic_test_sql/not_null.sql", + "unique_id": "macro.dbt.default__test_not_null", + "macro_sql": "{% macro default__test_not_null(model, column_name) %}\n\n{% set column_list = '*' if should_store_failures() else column_name %}\n\nselect {{ column_list }}\nfrom {{ model }}\nwhere {{ column_name }} is null\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.should_store_failures"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0332408, + "supported_languages": null + }, + "macro.dbt.default__test_relationships": { + "name": "default__test_relationships", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/generic_test_sql/relationships.sql", + "original_file_path": "macros/generic_test_sql/relationships.sql", + "unique_id": "macro.dbt.default__test_relationships", + "macro_sql": "{% macro default__test_relationships(model, column_name, to, field) %}\n\nwith child as (\n select {{ column_name }} as from_field\n from {{ model }}\n where {{ column_name }} is not null\n),\n\nparent as (\n select {{ field }} as to_field\n from {{ to }}\n)\n\nselect\n from_field\n\nfrom child\nleft join parent\n on child.from_field = parent.to_field\n\nwhere parent.to_field is null\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0337026, + "supported_languages": null + }, + "macro.dbt.default__test_accepted_values": { + "name": "default__test_accepted_values", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/generic_test_sql/accepted_values.sql", + "original_file_path": "macros/generic_test_sql/accepted_values.sql", + "unique_id": "macro.dbt.default__test_accepted_values", + "macro_sql": "{% macro default__test_accepted_values(model, column_name, values, quote=True) %}\n\nwith all_values as (\n\n select\n {{ column_name }} as value_field,\n count(*) as n_records\n\n from {{ model }}\n group by {{ column_name }}\n\n)\n\nselect *\nfrom all_values\nwhere value_field not in (\n {% for value in values -%}\n {% if quote -%}\n '{{ value }}'\n {%- else -%}\n {{ value }}\n {%- endif -%}\n {%- if not loop.last -%},{%- endif %}\n {%- endfor %}\n)\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0344737, + "supported_languages": null + }, + "macro.dbt.set_sql_header": { + "name": "set_sql_header", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/configs.sql", + "original_file_path": "macros/materializations/configs.sql", + "unique_id": "macro.dbt.set_sql_header", + "macro_sql": "{% macro set_sql_header(config) -%}\n {{ config.set('sql_header', caller()) }}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0349834, + "supported_languages": null + }, + "macro.dbt.should_full_refresh": { + "name": "should_full_refresh", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/configs.sql", + "original_file_path": "macros/materializations/configs.sql", + "unique_id": "macro.dbt.should_full_refresh", + "macro_sql": "{% macro should_full_refresh() %}\n {% set config_full_refresh = config.get('full_refresh') %}\n {% if config_full_refresh is none %}\n {% set config_full_refresh = flags.FULL_REFRESH %}\n {% endif %}\n {% do return(config_full_refresh) %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0354319, + "supported_languages": null + }, + "macro.dbt.should_store_failures": { + "name": "should_store_failures", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/configs.sql", + "original_file_path": "macros/materializations/configs.sql", + "unique_id": "macro.dbt.should_store_failures", + "macro_sql": "{% macro should_store_failures() %}\n {% set config_store_failures = config.get('store_failures') %}\n {% if config_store_failures is none %}\n {% set config_store_failures = flags.STORE_FAILURES %}\n {% endif %}\n {% do return(config_store_failures) %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0358763, + "supported_languages": null + }, + "macro.dbt.run_hooks": { + "name": "run_hooks", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/hooks.sql", + "original_file_path": "macros/materializations/hooks.sql", + "unique_id": "macro.dbt.run_hooks", + "macro_sql": "{% macro run_hooks(hooks, inside_transaction=True) %}\n {% for hook in hooks | selectattr('transaction', 'equalto', inside_transaction) %}\n {% if not inside_transaction and loop.first %}\n {% call statement(auto_begin=inside_transaction) %}\n commit;\n {% endcall %}\n {% endif %}\n {% set rendered = render(hook.get('sql')) | trim %}\n {% if (rendered | length) > 0 %}\n {% call statement(auto_begin=inside_transaction) %}\n {{ rendered }}\n {% endcall %}\n {% endif %}\n {% endfor %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.037561, + "supported_languages": null + }, + "macro.dbt.make_hook_config": { + "name": "make_hook_config", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/hooks.sql", + "original_file_path": "macros/materializations/hooks.sql", + "unique_id": "macro.dbt.make_hook_config", + "macro_sql": "{% macro make_hook_config(sql, inside_transaction) %}\n {{ tojson({\"sql\": sql, \"transaction\": inside_transaction}) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0379453, + "supported_languages": null + }, + "macro.dbt.before_begin": { + "name": "before_begin", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/hooks.sql", + "original_file_path": "macros/materializations/hooks.sql", + "unique_id": "macro.dbt.before_begin", + "macro_sql": "{% macro before_begin(sql) %}\n {{ make_hook_config(sql, inside_transaction=False) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.make_hook_config"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.03818, + "supported_languages": null + }, + "macro.dbt.in_transaction": { + "name": "in_transaction", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/hooks.sql", + "original_file_path": "macros/materializations/hooks.sql", + "unique_id": "macro.dbt.in_transaction", + "macro_sql": "{% macro in_transaction(sql) %}\n {{ make_hook_config(sql, inside_transaction=True) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.make_hook_config"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.038476, + "supported_languages": null + }, + "macro.dbt.after_commit": { + "name": "after_commit", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/hooks.sql", + "original_file_path": "macros/materializations/hooks.sql", + "unique_id": "macro.dbt.after_commit", + "macro_sql": "{% macro after_commit(sql) %}\n {{ make_hook_config(sql, inside_transaction=False) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.make_hook_config"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0386918, + "supported_languages": null + }, + "macro.dbt.get_where_subquery": { + "name": "get_where_subquery", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/tests/where_subquery.sql", + "original_file_path": "macros/materializations/tests/where_subquery.sql", + "unique_id": "macro.dbt.get_where_subquery", + "macro_sql": "{% macro get_where_subquery(relation) -%}\n {% do return(adapter.dispatch('get_where_subquery', 'dbt')(relation)) %}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_where_subquery"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.039237, + "supported_languages": null + }, + "macro.dbt.default__get_where_subquery": { + "name": "default__get_where_subquery", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/tests/where_subquery.sql", + "original_file_path": "macros/materializations/tests/where_subquery.sql", + "unique_id": "macro.dbt.default__get_where_subquery", + "macro_sql": "{% macro default__get_where_subquery(relation) -%}\n {% set where = config.get('where', '') %}\n {% if where %}\n {%- set filtered -%}\n (select * from {{ relation }} where {{ where }}) dbt_subquery\n {%- endset -%}\n {% do return(filtered) %}\n {%- else -%}\n {% do return(relation) %}\n {%- endif -%}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0397973, + "supported_languages": null + }, + "macro.dbt.get_test_sql": { + "name": "get_test_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/tests/helpers.sql", + "original_file_path": "macros/materializations/tests/helpers.sql", + "unique_id": "macro.dbt.get_test_sql", + "macro_sql": "{% macro get_test_sql(main_sql, fail_calc, warn_if, error_if, limit) -%}\n {{ adapter.dispatch('get_test_sql', 'dbt')(main_sql, fail_calc, warn_if, error_if, limit) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_test_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0404356, + "supported_languages": null + }, + "macro.dbt.default__get_test_sql": { + "name": "default__get_test_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/tests/helpers.sql", + "original_file_path": "macros/materializations/tests/helpers.sql", + "unique_id": "macro.dbt.default__get_test_sql", + "macro_sql": "{% macro default__get_test_sql(main_sql, fail_calc, warn_if, error_if, limit) -%}\n select\n {{ fail_calc }} as failures,\n {{ fail_calc }} {{ warn_if }} as should_warn,\n {{ fail_calc }} {{ error_if }} as should_error\n from (\n {{ main_sql }}\n {{ \"limit \" ~ limit if limit != none }}\n ) dbt_internal_test\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.04103, + "supported_languages": null + }, + "macro.dbt.materialization_test_default": { + "name": "materialization_test_default", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/tests/test.sql", + "original_file_path": "macros/materializations/tests/test.sql", + "unique_id": "macro.dbt.materialization_test_default", + "macro_sql": "{%- materialization test, default -%}\n\n {% set relations = [] %}\n\n {% if should_store_failures() %}\n\n {% set identifier = model['alias'] %}\n {% set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}\n\n {% set store_failures_as = config.get('store_failures_as') %}\n -- if `--store-failures` is invoked via command line and `store_failures_as` is not set,\n -- config.get('store_failures_as', 'table') returns None, not 'table'\n {% if store_failures_as == none %}{% set store_failures_as = 'table' %}{% endif %}\n {% if store_failures_as not in ['table', 'view'] %}\n {{ exceptions.raise_compiler_error(\n \"'\" ~ store_failures_as ~ \"' is not a valid value for `store_failures_as`. \"\n \"Accepted values are: ['ephemeral', 'table', 'view']\"\n ) }}\n {% endif %}\n\n {% set target_relation = api.Relation.create(\n identifier=identifier, schema=schema, database=database, type=store_failures_as) -%} %}\n\n {% if old_relation %}\n {% do adapter.drop_relation(old_relation) %}\n {% endif %}\n\n {% call statement(auto_begin=True) %}\n {{ get_create_sql(target_relation, sql) }}\n {% endcall %}\n\n {% do relations.append(target_relation) %}\n\n {% set main_sql %}\n select *\n from {{ target_relation }}\n {% endset %}\n\n {{ adapter.commit() }}\n\n {% else %}\n\n {% set main_sql = sql %}\n\n {% endif %}\n\n {% set limit = config.get('limit') %}\n {% set fail_calc = config.get('fail_calc') %}\n {% set warn_if = config.get('warn_if') %}\n {% set error_if = config.get('error_if') %}\n\n {% call statement('main', fetch_result=True) -%}\n\n {{ get_test_sql(main_sql, fail_calc, warn_if, error_if, limit)}}\n\n {%- endcall %}\n\n {{ return({'relations': relations}) }}\n\n{%- endmaterialization -%}", + "depends_on": { + "macros": [ + "macro.dbt.should_store_failures", + "macro.dbt.statement", + "macro.dbt.get_create_sql", + "macro.dbt.get_test_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0447197, + "supported_languages": ["sql"] + }, + "macro.dbt.create_csv_table": { + "name": "create_csv_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.create_csv_table", + "macro_sql": "{% macro create_csv_table(model, agate_table) -%}\n {{ adapter.dispatch('create_csv_table', 'dbt')(model, agate_table) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__create_csv_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0496247, + "supported_languages": null + }, + "macro.dbt.default__create_csv_table": { + "name": "default__create_csv_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.default__create_csv_table", + "macro_sql": "{% macro default__create_csv_table(model, agate_table) %}\n {%- set column_override = model['config'].get('column_types', {}) -%}\n {%- set quote_seed_column = model['config'].get('quote_columns', None) -%}\n\n {% set sql %}\n create table {{ this.render() }} (\n {%- for col_name in agate_table.column_names -%}\n {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%}\n {%- set type = column_override.get(col_name, inferred_type) -%}\n {%- set column_name = (col_name | string) -%}\n {{ adapter.quote_seed_column(column_name, quote_seed_column) }} {{ type }} {%- if not loop.last -%}, {%- endif -%}\n {%- endfor -%}\n )\n {% endset %}\n\n {% call statement('_') -%}\n {{ sql }}\n {%- endcall %}\n\n {{ return(sql) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.05101, + "supported_languages": null + }, + "macro.dbt.reset_csv_table": { + "name": "reset_csv_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.reset_csv_table", + "macro_sql": "{% macro reset_csv_table(model, full_refresh, old_relation, agate_table) -%}\n {{ adapter.dispatch('reset_csv_table', 'dbt')(model, full_refresh, old_relation, agate_table) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__reset_csv_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0513537, + "supported_languages": null + }, + "macro.dbt.default__reset_csv_table": { + "name": "default__reset_csv_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.default__reset_csv_table", + "macro_sql": "{% macro default__reset_csv_table(model, full_refresh, old_relation, agate_table) %}\n {% set sql = \"\" %}\n {% if full_refresh %}\n {{ adapter.drop_relation(old_relation) }}\n {% set sql = create_csv_table(model, agate_table) %}\n {% else %}\n {{ adapter.truncate_relation(old_relation) }}\n {% set sql = \"truncate table \" ~ old_relation %}\n {% endif %}\n\n {{ return(sql) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.create_csv_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0520751, + "supported_languages": null + }, + "macro.dbt.get_csv_sql": { + "name": "get_csv_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.get_csv_sql", + "macro_sql": "{% macro get_csv_sql(create_or_truncate_sql, insert_sql) %}\n {{ adapter.dispatch('get_csv_sql', 'dbt')(create_or_truncate_sql, insert_sql) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_csv_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0523882, + "supported_languages": null + }, + "macro.dbt.default__get_csv_sql": { + "name": "default__get_csv_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.default__get_csv_sql", + "macro_sql": "{% macro default__get_csv_sql(create_or_truncate_sql, insert_sql) %}\n {{ create_or_truncate_sql }};\n -- dbt seed --\n {{ insert_sql }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0525935, + "supported_languages": null + }, + "macro.dbt.get_binding_char": { + "name": "get_binding_char", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.get_binding_char", + "macro_sql": "{% macro get_binding_char() -%}\n {{ adapter.dispatch('get_binding_char', 'dbt')() }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__get_binding_char"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0528047, + "supported_languages": null + }, + "macro.dbt.default__get_binding_char": { + "name": "default__get_binding_char", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.default__get_binding_char", + "macro_sql": "{% macro default__get_binding_char() %}\n {{ return('%s') }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0529766, + "supported_languages": null + }, + "macro.dbt.get_batch_size": { + "name": "get_batch_size", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.get_batch_size", + "macro_sql": "{% macro get_batch_size() -%}\n {{ return(adapter.dispatch('get_batch_size', 'dbt')()) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__get_batch_size"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0532641, + "supported_languages": null + }, + "macro.dbt.default__get_batch_size": { + "name": "default__get_batch_size", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.default__get_batch_size", + "macro_sql": "{% macro default__get_batch_size() %}\n {{ return(10000) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0534682, + "supported_languages": null + }, + "macro.dbt.get_seed_column_quoted_csv": { + "name": "get_seed_column_quoted_csv", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.get_seed_column_quoted_csv", + "macro_sql": "{% macro get_seed_column_quoted_csv(model, column_names) %}\n {%- set quote_seed_column = model['config'].get('quote_columns', None) -%}\n {% set quoted = [] %}\n {% for col in column_names -%}\n {%- do quoted.append(adapter.quote_seed_column(col, quote_seed_column)) -%}\n {%- endfor %}\n\n {%- set dest_cols_csv = quoted | join(', ') -%}\n {{ return(dest_cols_csv) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.054262, + "supported_languages": null + }, + "macro.dbt.load_csv_rows": { + "name": "load_csv_rows", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.load_csv_rows", + "macro_sql": "{% macro load_csv_rows(model, agate_table) -%}\n {{ adapter.dispatch('load_csv_rows', 'dbt')(model, agate_table) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__load_csv_rows"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.054549, + "supported_languages": null + }, + "macro.dbt.default__load_csv_rows": { + "name": "default__load_csv_rows", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/helpers.sql", + "original_file_path": "macros/materializations/seeds/helpers.sql", + "unique_id": "macro.dbt.default__load_csv_rows", + "macro_sql": "{% macro default__load_csv_rows(model, agate_table) %}\n\n {% set batch_size = get_batch_size() %}\n\n {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %}\n {% set bindings = [] %}\n\n {% set statements = [] %}\n\n {% for chunk in agate_table.rows | batch(batch_size) %}\n {% set bindings = [] %}\n\n {% for row in chunk %}\n {% do bindings.extend(row) %}\n {% endfor %}\n\n {% set sql %}\n insert into {{ this.render() }} ({{ cols_sql }}) values\n {% for row in chunk -%}\n ({%- for column in agate_table.column_names -%}\n {{ get_binding_char() }}\n {%- if not loop.last%},{%- endif %}\n {%- endfor -%})\n {%- if not loop.last%},{%- endif %}\n {%- endfor %}\n {% endset %}\n\n {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %}\n\n {% if loop.index0 == 0 %}\n {% do statements.append(sql) %}\n {% endif %}\n {% endfor %}\n\n {# Return SQL so we can render it out into the compiled files #}\n {{ return(statements[0]) }}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.get_batch_size", + "macro.dbt.get_seed_column_quoted_csv", + "macro.dbt.get_binding_char" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0565498, + "supported_languages": null + }, + "macro.dbt.materialization_seed_default": { + "name": "materialization_seed_default", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/seeds/seed.sql", + "original_file_path": "macros/materializations/seeds/seed.sql", + "unique_id": "macro.dbt.materialization_seed_default", + "macro_sql": "{% materialization seed, default %}\n\n {%- set identifier = model['alias'] -%}\n {%- set full_refresh_mode = (should_full_refresh()) -%}\n\n {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}\n\n {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}\n {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}\n\n {%- set grant_config = config.get('grants') -%}\n {%- set agate_table = load_agate_table() -%}\n -- grab current tables grants config for comparison later on\n\n {%- do store_result('agate_table', response='OK', agate_table=agate_table) -%}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% set create_table_sql = \"\" %}\n {% if exists_as_view %}\n {{ exceptions.raise_compiler_error(\"Cannot seed to '{}', it is a view\".format(old_relation)) }}\n {% elif exists_as_table %}\n {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %}\n {% else %}\n {% set create_table_sql = create_csv_table(model, agate_table) %}\n {% endif %}\n\n {% set code = 'CREATE' if full_refresh_mode else 'INSERT' %}\n {% set rows_affected = (agate_table.rows | length) %}\n {% set sql = load_csv_rows(model, agate_table) %}\n\n {% call noop_statement('main', code ~ ' ' ~ rows_affected, code, rows_affected) %}\n {{ get_csv_sql(create_table_sql, sql) }};\n {% endcall %}\n\n {% set target_relation = this.incorporate(type='table') %}\n\n {% set should_revoke = should_revoke(old_relation, full_refresh_mode) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {% if full_refresh_mode or not exists_as_table %}\n {% do create_indexes(target_relation) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", + "depends_on": { + "macros": [ + "macro.dbt.should_full_refresh", + "macro.dbt.run_hooks", + "macro.dbt.reset_csv_table", + "macro.dbt.create_csv_table", + "macro.dbt.load_csv_rows", + "macro.dbt.noop_statement", + "macro.dbt.get_csv_sql", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs", + "macro.dbt.create_indexes" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0612874, + "supported_languages": ["sql"] + }, + "macro.dbt.snapshot_merge_sql": { + "name": "snapshot_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/snapshot_merge.sql", + "original_file_path": "macros/materializations/snapshots/snapshot_merge.sql", + "unique_id": "macro.dbt.snapshot_merge_sql", + "macro_sql": "{% macro snapshot_merge_sql(target, source, insert_cols) -%}\n {{ adapter.dispatch('snapshot_merge_sql', 'dbt')(target, source, insert_cols) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__snapshot_merge_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0619018, + "supported_languages": null + }, + "macro.dbt.default__snapshot_merge_sql": { + "name": "default__snapshot_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/snapshot_merge.sql", + "original_file_path": "macros/materializations/snapshots/snapshot_merge.sql", + "unique_id": "macro.dbt.default__snapshot_merge_sql", + "macro_sql": "{% macro default__snapshot_merge_sql(target, source, insert_cols) -%}\n {%- set insert_cols_csv = insert_cols | join(', ') -%}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id\n\n when matched\n and DBT_INTERNAL_DEST.dbt_valid_to is null\n and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete')\n then update\n set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to\n\n when not matched\n and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert'\n then insert ({{ insert_cols_csv }})\n values ({{ insert_cols_csv }})\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0623295, + "supported_languages": null + }, + "macro.dbt.materialization_snapshot_default": { + "name": "materialization_snapshot_default", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/snapshot.sql", + "original_file_path": "macros/materializations/snapshots/snapshot.sql", + "unique_id": "macro.dbt.materialization_snapshot_default", + "macro_sql": "{% materialization snapshot, default %}\n {%- set config = model['config'] -%}\n\n {%- set target_table = model.get('alias', model.get('name')) -%}\n\n {%- set strategy_name = config.get('strategy') -%}\n {%- set unique_key = config.get('unique_key') %}\n -- grab current tables grants config for comparision later on\n {%- set grant_config = config.get('grants') -%}\n\n {% set target_relation_exists, target_relation = get_or_create_relation(\n database=model.database,\n schema=model.schema,\n identifier=target_table,\n type='table') -%}\n\n {%- if not target_relation.is_table -%}\n {% do exceptions.relation_wrong_type(target_relation, 'table') %}\n {%- endif -%}\n\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set strategy_macro = strategy_dispatch(strategy_name) %}\n {% set strategy = strategy_macro(model, \"snapshotted_data\", \"source_data\", config, target_relation_exists) %}\n\n {% if not target_relation_exists %}\n\n {% set build_sql = build_snapshot_table(strategy, model['compiled_code']) %}\n {% set final_sql = create_table_as(False, target_relation, build_sql) %}\n\n {% else %}\n\n {{ adapter.valid_snapshot_target(target_relation) }}\n\n {% set staging_table = build_snapshot_staging_table(strategy, sql, target_relation) %}\n\n -- this may no-op if the database does not require column expansion\n {% do adapter.expand_target_column_types(from_relation=staging_table,\n to_relation=target_relation) %}\n\n {% set missing_columns = adapter.get_missing_columns(staging_table, target_relation)\n | rejectattr('name', 'equalto', 'dbt_change_type')\n | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')\n | rejectattr('name', 'equalto', 'dbt_unique_key')\n | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')\n | list %}\n\n {% do create_columns(target_relation, missing_columns) %}\n\n {% set source_columns = adapter.get_columns_in_relation(staging_table)\n | rejectattr('name', 'equalto', 'dbt_change_type')\n | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')\n | rejectattr('name', 'equalto', 'dbt_unique_key')\n | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')\n | list %}\n\n {% set quoted_source_columns = [] %}\n {% for column in source_columns %}\n {% do quoted_source_columns.append(adapter.quote(column.name)) %}\n {% endfor %}\n\n {% set final_sql = snapshot_merge_sql(\n target = target_relation,\n source = staging_table,\n insert_cols = quoted_source_columns\n )\n %}\n\n {% endif %}\n\n {% call statement('main') %}\n {{ final_sql }}\n {% endcall %}\n\n {% set should_revoke = should_revoke(target_relation_exists, full_refresh_mode=False) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {% if not target_relation_exists %}\n {% do create_indexes(target_relation) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {{ adapter.commit() }}\n\n {% if staging_table is defined %}\n {% do post_snapshot(staging_table) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", + "depends_on": { + "macros": [ + "macro.dbt.get_or_create_relation", + "macro.dbt.run_hooks", + "macro.dbt.strategy_dispatch", + "macro.dbt.build_snapshot_table", + "macro.dbt.create_table_as", + "macro.dbt.build_snapshot_staging_table", + "macro.dbt.create_columns", + "macro.dbt.snapshot_merge_sql", + "macro.dbt.statement", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs", + "macro.dbt.create_indexes", + "macro.dbt.post_snapshot" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0699165, + "supported_languages": ["sql"] + }, + "macro.dbt.strategy_dispatch": { + "name": "strategy_dispatch", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/materializations/snapshots/strategies.sql", + "unique_id": "macro.dbt.strategy_dispatch", + "macro_sql": "{% macro strategy_dispatch(name) -%}\n{% set original_name = name %}\n {% if '.' in name %}\n {% set package_name, name = name.split(\".\", 1) %}\n {% else %}\n {% set package_name = none %}\n {% endif %}\n\n {% if package_name is none %}\n {% set package_context = context %}\n {% elif package_name in context %}\n {% set package_context = context[package_name] %}\n {% else %}\n {% set error_msg %}\n Could not find package '{{package_name}}', called with '{{original_name}}'\n {% endset %}\n {{ exceptions.raise_compiler_error(error_msg | trim) }}\n {% endif %}\n\n {%- set search_name = 'snapshot_' ~ name ~ '_strategy' -%}\n\n {% if search_name not in package_context %}\n {% set error_msg %}\n The specified strategy macro '{{name}}' was not found in package '{{ package_name }}'\n {% endset %}\n {{ exceptions.raise_compiler_error(error_msg | trim) }}\n {% endif %}\n {{ return(package_context[search_name]) }}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0742855, + "supported_languages": null + }, + "macro.dbt.snapshot_hash_arguments": { + "name": "snapshot_hash_arguments", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/materializations/snapshots/strategies.sql", + "unique_id": "macro.dbt.snapshot_hash_arguments", + "macro_sql": "{% macro snapshot_hash_arguments(args) -%}\n {{ adapter.dispatch('snapshot_hash_arguments', 'dbt')(args) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__snapshot_hash_arguments"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.074553, + "supported_languages": null + }, + "macro.dbt.default__snapshot_hash_arguments": { + "name": "default__snapshot_hash_arguments", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/materializations/snapshots/strategies.sql", + "unique_id": "macro.dbt.default__snapshot_hash_arguments", + "macro_sql": "{% macro default__snapshot_hash_arguments(args) -%}\n md5({%- for arg in args -%}\n coalesce(cast({{ arg }} as varchar ), '')\n {% if not loop.last %} || '|' || {% endif %}\n {%- endfor -%})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0749283, + "supported_languages": null + }, + "macro.dbt.snapshot_timestamp_strategy": { + "name": "snapshot_timestamp_strategy", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/materializations/snapshots/strategies.sql", + "unique_id": "macro.dbt.snapshot_timestamp_strategy", + "macro_sql": "{% macro snapshot_timestamp_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}\n {% set primary_key = config['unique_key'] %}\n {% set updated_at = config['updated_at'] %}\n {% set invalidate_hard_deletes = config.get('invalidate_hard_deletes', false) %}\n\n {#/*\n The snapshot relation might not have an {{ updated_at }} value if the\n snapshot strategy is changed from `check` to `timestamp`. We\n should use a dbt-created column for the comparison in the snapshot\n table instead of assuming that the user-supplied {{ updated_at }}\n will be present in the historical data.\n\n See https://github.com/dbt-labs/dbt-core/issues/2350\n */ #}\n {% set row_changed_expr -%}\n ({{ snapshotted_rel }}.dbt_valid_from < {{ current_rel }}.{{ updated_at }})\n {%- endset %}\n\n {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}\n\n {% do return({\n \"unique_key\": primary_key,\n \"updated_at\": updated_at,\n \"row_changed\": row_changed_expr,\n \"scd_id\": scd_id_expr,\n \"invalidate_hard_deletes\": invalidate_hard_deletes\n }) %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.snapshot_hash_arguments"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0760298, + "supported_languages": null + }, + "macro.dbt.snapshot_string_as_time": { + "name": "snapshot_string_as_time", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/materializations/snapshots/strategies.sql", + "unique_id": "macro.dbt.snapshot_string_as_time", + "macro_sql": "{% macro snapshot_string_as_time(timestamp) -%}\n {{ adapter.dispatch('snapshot_string_as_time', 'dbt')(timestamp) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__snapshot_string_as_time"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0762947, + "supported_languages": null + }, + "macro.dbt.default__snapshot_string_as_time": { + "name": "default__snapshot_string_as_time", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/materializations/snapshots/strategies.sql", + "unique_id": "macro.dbt.default__snapshot_string_as_time", + "macro_sql": "{% macro default__snapshot_string_as_time(timestamp) %}\n {% do exceptions.raise_not_implemented(\n 'snapshot_string_as_time macro not implemented for adapter '+adapter.type()\n ) %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0765827, + "supported_languages": null + }, + "macro.dbt.snapshot_check_all_get_existing_columns": { + "name": "snapshot_check_all_get_existing_columns", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/materializations/snapshots/strategies.sql", + "unique_id": "macro.dbt.snapshot_check_all_get_existing_columns", + "macro_sql": "{% macro snapshot_check_all_get_existing_columns(node, target_exists, check_cols_config) -%}\n {%- if not target_exists -%}\n {#-- no table yet -> return whatever the query does --#}\n {{ return((false, query_columns)) }}\n {%- endif -%}\n\n {#-- handle any schema changes --#}\n {%- set target_relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=node.alias) -%}\n\n {% if check_cols_config == 'all' %}\n {%- set query_columns = get_columns_in_query(node['compiled_code']) -%}\n\n {% elif check_cols_config is iterable and (check_cols_config | length) > 0 %}\n {#-- query for proper casing/quoting, to support comparison below --#}\n {%- set select_check_cols_from_target -%}\n {#-- N.B. The whitespace below is necessary to avoid edge case issue with comments --#}\n {#-- See: https://github.com/dbt-labs/dbt-core/issues/6781 --#}\n select {{ check_cols_config | join(', ') }} from (\n {{ node['compiled_code'] }}\n ) subq\n {%- endset -%}\n {% set query_columns = get_columns_in_query(select_check_cols_from_target) %}\n\n {% else %}\n {% do exceptions.raise_compiler_error(\"Invalid value for 'check_cols': \" ~ check_cols_config) %}\n {% endif %}\n\n {%- set existing_cols = adapter.get_columns_in_relation(target_relation) | map(attribute = 'name') | list -%}\n {%- set ns = namespace() -%} {#-- handle for-loop scoping with a namespace --#}\n {%- set ns.column_added = false -%}\n\n {%- set intersection = [] -%}\n {%- for col in query_columns -%}\n {%- if col in existing_cols -%}\n {%- do intersection.append(adapter.quote(col)) -%}\n {%- else -%}\n {% set ns.column_added = true %}\n {%- endif -%}\n {%- endfor -%}\n {{ return((ns.column_added, intersection)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_columns_in_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0787218, + "supported_languages": null + }, + "macro.dbt.snapshot_check_strategy": { + "name": "snapshot_check_strategy", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/materializations/snapshots/strategies.sql", + "unique_id": "macro.dbt.snapshot_check_strategy", + "macro_sql": "{% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}\n {% set check_cols_config = config['check_cols'] %}\n {% set primary_key = config['unique_key'] %}\n {% set invalidate_hard_deletes = config.get('invalidate_hard_deletes', false) %}\n {% set updated_at = config.get('updated_at', snapshot_get_time()) %}\n\n {% set column_added = false %}\n\n {% set column_added, check_cols = snapshot_check_all_get_existing_columns(node, target_exists, check_cols_config) %}\n\n {%- set row_changed_expr -%}\n (\n {%- if column_added -%}\n {{ get_true_sql() }}\n {%- else -%}\n {%- for col in check_cols -%}\n {{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }}\n or\n (\n (({{ snapshotted_rel }}.{{ col }} is null) and not ({{ current_rel }}.{{ col }} is null))\n or\n ((not {{ snapshotted_rel }}.{{ col }} is null) and ({{ current_rel }}.{{ col }} is null))\n )\n {%- if not loop.last %} or {% endif -%}\n {%- endfor -%}\n {%- endif -%}\n )\n {%- endset %}\n\n {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}\n\n {% do return({\n \"unique_key\": primary_key,\n \"updated_at\": updated_at,\n \"row_changed\": row_changed_expr,\n \"scd_id\": scd_id_expr,\n \"invalidate_hard_deletes\": invalidate_hard_deletes\n }) %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.snapshot_get_time", + "macro.dbt.snapshot_check_all_get_existing_columns", + "macro.dbt.get_true_sql", + "macro.dbt.snapshot_hash_arguments" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0808053, + "supported_languages": null + }, + "macro.dbt.create_columns": { + "name": "create_columns", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.create_columns", + "macro_sql": "{% macro create_columns(relation, columns) %}\n {{ adapter.dispatch('create_columns', 'dbt')(relation, columns) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__create_columns"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.084671, + "supported_languages": null + }, + "macro.dbt.default__create_columns": { + "name": "default__create_columns", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.default__create_columns", + "macro_sql": "{% macro default__create_columns(relation, columns) %}\n {% for column in columns %}\n {% call statement() %}\n alter table {{ relation }} add column \"{{ column.name }}\" {{ column.data_type }};\n {% endcall %}\n {% endfor %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0851166, + "supported_languages": null + }, + "macro.dbt.post_snapshot": { + "name": "post_snapshot", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.post_snapshot", + "macro_sql": "{% macro post_snapshot(staging_relation) %}\n {{ adapter.dispatch('post_snapshot', 'dbt')(staging_relation) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__post_snapshot"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.085432, + "supported_languages": null + }, + "macro.dbt.default__post_snapshot": { + "name": "default__post_snapshot", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.default__post_snapshot", + "macro_sql": "{% macro default__post_snapshot(staging_relation) %}\n {# no-op #}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0855753, + "supported_languages": null + }, + "macro.dbt.get_true_sql": { + "name": "get_true_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.get_true_sql", + "macro_sql": "{% macro get_true_sql() %}\n {{ adapter.dispatch('get_true_sql', 'dbt')() }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_true_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0858002, + "supported_languages": null + }, + "macro.dbt.default__get_true_sql": { + "name": "default__get_true_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.default__get_true_sql", + "macro_sql": "{% macro default__get_true_sql() %}\n {{ return('TRUE') }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.085976, + "supported_languages": null + }, + "macro.dbt.snapshot_staging_table": { + "name": "snapshot_staging_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.snapshot_staging_table", + "macro_sql": "{% macro snapshot_staging_table(strategy, source_sql, target_relation) -%}\n {{ adapter.dispatch('snapshot_staging_table', 'dbt')(strategy, source_sql, target_relation) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__snapshot_staging_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0862942, + "supported_languages": null + }, + "macro.dbt.default__snapshot_staging_table": { + "name": "default__snapshot_staging_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.default__snapshot_staging_table", + "macro_sql": "{% macro default__snapshot_staging_table(strategy, source_sql, target_relation) -%}\n\n with snapshot_query as (\n\n {{ source_sql }}\n\n ),\n\n snapshotted_data as (\n\n select *,\n {{ strategy.unique_key }} as dbt_unique_key\n\n from {{ target_relation }}\n where dbt_valid_to is null\n\n ),\n\n insertions_source_data as (\n\n select\n *,\n {{ strategy.unique_key }} as dbt_unique_key,\n {{ strategy.updated_at }} as dbt_updated_at,\n {{ strategy.updated_at }} as dbt_valid_from,\n nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to,\n {{ strategy.scd_id }} as dbt_scd_id\n\n from snapshot_query\n ),\n\n updates_source_data as (\n\n select\n *,\n {{ strategy.unique_key }} as dbt_unique_key,\n {{ strategy.updated_at }} as dbt_updated_at,\n {{ strategy.updated_at }} as dbt_valid_from,\n {{ strategy.updated_at }} as dbt_valid_to\n\n from snapshot_query\n ),\n\n {%- if strategy.invalidate_hard_deletes %}\n\n deletes_source_data as (\n\n select\n *,\n {{ strategy.unique_key }} as dbt_unique_key\n from snapshot_query\n ),\n {% endif %}\n\n insertions as (\n\n select\n 'insert' as dbt_change_type,\n source_data.*\n\n from insertions_source_data as source_data\n left outer join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key\n where snapshotted_data.dbt_unique_key is null\n or (\n snapshotted_data.dbt_unique_key is not null\n and (\n {{ strategy.row_changed }}\n )\n )\n\n ),\n\n updates as (\n\n select\n 'update' as dbt_change_type,\n source_data.*,\n snapshotted_data.dbt_scd_id\n\n from updates_source_data as source_data\n join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key\n where (\n {{ strategy.row_changed }}\n )\n )\n\n {%- if strategy.invalidate_hard_deletes -%}\n ,\n\n deletes as (\n\n select\n 'delete' as dbt_change_type,\n source_data.*,\n {{ snapshot_get_time() }} as dbt_valid_from,\n {{ snapshot_get_time() }} as dbt_updated_at,\n {{ snapshot_get_time() }} as dbt_valid_to,\n snapshotted_data.dbt_scd_id\n\n from snapshotted_data\n left join deletes_source_data as source_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key\n where source_data.dbt_unique_key is null\n )\n {%- endif %}\n\n select * from insertions\n union all\n select * from updates\n {%- if strategy.invalidate_hard_deletes %}\n union all\n select * from deletes\n {%- endif %}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.snapshot_get_time"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0877678, + "supported_languages": null + }, + "macro.dbt.build_snapshot_table": { + "name": "build_snapshot_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.build_snapshot_table", + "macro_sql": "{% macro build_snapshot_table(strategy, sql) -%}\n {{ adapter.dispatch('build_snapshot_table', 'dbt')(strategy, sql) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__build_snapshot_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0880618, + "supported_languages": null + }, + "macro.dbt.default__build_snapshot_table": { + "name": "default__build_snapshot_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.default__build_snapshot_table", + "macro_sql": "{% macro default__build_snapshot_table(strategy, sql) %}\n\n select *,\n {{ strategy.scd_id }} as dbt_scd_id,\n {{ strategy.updated_at }} as dbt_updated_at,\n {{ strategy.updated_at }} as dbt_valid_from,\n nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to\n from (\n {{ sql }}\n ) sbq\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0884404, + "supported_languages": null + }, + "macro.dbt.build_snapshot_staging_table": { + "name": "build_snapshot_staging_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/snapshots/helpers.sql", + "original_file_path": "macros/materializations/snapshots/helpers.sql", + "unique_id": "macro.dbt.build_snapshot_staging_table", + "macro_sql": "{% macro build_snapshot_staging_table(strategy, sql, target_relation) %}\n {% set temp_relation = make_temp_relation(target_relation) %}\n\n {% set select = snapshot_staging_table(strategy, sql, target_relation) %}\n\n {% call statement('build_snapshot_staging_relation') %}\n {{ create_table_as(True, temp_relation, select) }}\n {% endcall %}\n\n {% do return(temp_relation) %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.make_temp_relation", + "macro.dbt.snapshot_staging_table", + "macro.dbt.statement", + "macro.dbt.create_table_as" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0891027, + "supported_languages": null + }, + "macro.dbt.materialization_materialized_view_default": { + "name": "materialization_materialized_view_default", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/materialized_view.sql", + "original_file_path": "macros/materializations/models/materialized_view.sql", + "unique_id": "macro.dbt.materialization_materialized_view_default", + "macro_sql": "{% materialization materialized_view, default %}\n {% set existing_relation = load_cached_relation(this) %}\n {% set target_relation = this.incorporate(type=this.MaterializedView) %}\n {% set intermediate_relation = make_intermediate_relation(target_relation) %}\n {% set backup_relation_type = target_relation.MaterializedView if existing_relation is none else existing_relation.type %}\n {% set backup_relation = make_backup_relation(target_relation, backup_relation_type) %}\n\n {{ materialized_view_setup(backup_relation, intermediate_relation, pre_hooks) }}\n\n {% set build_sql = materialized_view_get_build_sql(existing_relation, target_relation, backup_relation, intermediate_relation) %}\n\n {% if build_sql == '' %}\n {{ materialized_view_execute_no_op(target_relation) }}\n {% else %}\n {{ materialized_view_execute_build_sql(build_sql, existing_relation, target_relation, post_hooks) }}\n {% endif %}\n\n {{ materialized_view_teardown(backup_relation, intermediate_relation, post_hooks) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", + "depends_on": { + "macros": [ + "macro.dbt.load_cached_relation", + "macro.dbt.make_intermediate_relation", + "macro.dbt.make_backup_relation", + "macro.dbt.materialized_view_setup", + "macro.dbt.materialized_view_get_build_sql", + "macro.dbt.materialized_view_execute_no_op", + "macro.dbt.materialized_view_execute_build_sql", + "macro.dbt.materialized_view_teardown" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0943735, + "supported_languages": ["sql"] + }, + "macro.dbt.materialized_view_setup": { + "name": "materialized_view_setup", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/materialized_view.sql", + "original_file_path": "macros/materializations/models/materialized_view.sql", + "unique_id": "macro.dbt.materialized_view_setup", + "macro_sql": "{% macro materialized_view_setup(backup_relation, intermediate_relation, pre_hooks) %}\n\n -- backup_relation and intermediate_relation should not already exist in the database\n -- it's possible these exist because of a previous run that exited unexpectedly\n {% set preexisting_backup_relation = load_cached_relation(backup_relation) %}\n {% set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) %}\n\n -- drop the temp relations if they exist already in the database\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.load_cached_relation", + "macro.dbt.drop_relation_if_exists", + "macro.dbt.run_hooks" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0950365, + "supported_languages": null + }, + "macro.dbt.materialized_view_teardown": { + "name": "materialized_view_teardown", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/materialized_view.sql", + "original_file_path": "macros/materializations/models/materialized_view.sql", + "unique_id": "macro.dbt.materialized_view_teardown", + "macro_sql": "{% macro materialized_view_teardown(backup_relation, intermediate_relation, post_hooks) %}\n\n -- drop the temp relations if they exist to leave the database clean for the next run\n {{ drop_relation_if_exists(backup_relation) }}\n {{ drop_relation_if_exists(intermediate_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.drop_relation_if_exists", "macro.dbt.run_hooks"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.09542, + "supported_languages": null + }, + "macro.dbt.materialized_view_get_build_sql": { + "name": "materialized_view_get_build_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/materialized_view.sql", + "original_file_path": "macros/materializations/models/materialized_view.sql", + "unique_id": "macro.dbt.materialized_view_get_build_sql", + "macro_sql": "{% macro materialized_view_get_build_sql(existing_relation, target_relation, backup_relation, intermediate_relation) %}\n\n {% set full_refresh_mode = should_full_refresh() %}\n\n -- determine the scenario we're in: create, full_refresh, alter, refresh data\n {% if existing_relation is none %}\n {% set build_sql = get_create_materialized_view_as_sql(target_relation, sql) %}\n {% elif full_refresh_mode or not existing_relation.is_materialized_view %}\n {% set build_sql = get_replace_sql(existing_relation, target_relation, sql) %}\n {% else %}\n\n -- get config options\n {% set on_configuration_change = config.get('on_configuration_change') %}\n {% set configuration_changes = get_materialized_view_configuration_changes(existing_relation, config) %}\n\n {% if configuration_changes is none %}\n {% set build_sql = refresh_materialized_view(target_relation) %}\n\n {% elif on_configuration_change == 'apply' %}\n {% set build_sql = get_alter_materialized_view_as_sql(target_relation, configuration_changes, sql, existing_relation, backup_relation, intermediate_relation) %}\n {% elif on_configuration_change == 'continue' %}\n {% set build_sql = '' %}\n {{ exceptions.warn(\"Configuration changes were identified and `on_configuration_change` was set to `continue` for `\" ~ target_relation ~ \"`\") }}\n {% elif on_configuration_change == 'fail' %}\n {{ exceptions.raise_fail_fast_error(\"Configuration changes were identified and `on_configuration_change` was set to `fail` for `\" ~ target_relation ~ \"`\") }}\n\n {% else %}\n -- this only happens if the user provides a value other than `apply`, 'skip', 'fail'\n {{ exceptions.raise_compiler_error(\"Unexpected configuration scenario\") }}\n\n {% endif %}\n\n {% endif %}\n\n {% do return(build_sql) %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.should_full_refresh", + "macro.dbt.get_create_materialized_view_as_sql", + "macro.dbt.get_replace_sql", + "macro.dbt.get_materialized_view_configuration_changes", + "macro.dbt.refresh_materialized_view", + "macro.dbt.get_alter_materialized_view_as_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0973876, + "supported_languages": null + }, + "macro.dbt.materialized_view_execute_no_op": { + "name": "materialized_view_execute_no_op", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/materialized_view.sql", + "original_file_path": "macros/materializations/models/materialized_view.sql", + "unique_id": "macro.dbt.materialized_view_execute_no_op", + "macro_sql": "{% macro materialized_view_execute_no_op(target_relation) %}\n {% do store_raw_result(\n name=\"main\",\n message=\"skip \" ~ target_relation,\n code=\"skip\",\n rows_affected=\"-1\"\n ) %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0977635, + "supported_languages": null + }, + "macro.dbt.materialized_view_execute_build_sql": { + "name": "materialized_view_execute_build_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/materialized_view.sql", + "original_file_path": "macros/materializations/models/materialized_view.sql", + "unique_id": "macro.dbt.materialized_view_execute_build_sql", + "macro_sql": "{% macro materialized_view_execute_build_sql(build_sql, existing_relation, target_relation, post_hooks) %}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set grant_config = config.get('grants') %}\n\n {% call statement(name=\"main\") %}\n {{ build_sql }}\n {% endcall %}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {{ adapter.commit() }}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.run_hooks", + "macro.dbt.statement", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.0987308, + "supported_languages": null + }, + "macro.dbt.materialization_table_default": { + "name": "materialization_table_default", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/table.sql", + "original_file_path": "macros/materializations/models/table.sql", + "unique_id": "macro.dbt.materialization_table_default", + "macro_sql": "{% materialization table, default %}\n\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set target_relation = this.incorporate(type='table') %}\n {%- set intermediate_relation = make_intermediate_relation(target_relation) -%}\n -- the intermediate_relation should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%}\n /*\n See ../view/view.sql for more information about this relation.\n */\n {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n -- as above, the backup_relation should not already exist\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n\n -- drop the temp relations if they exist already in the database\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% call statement('main') -%}\n {{ get_create_table_as_sql(False, intermediate_relation, sql) }}\n {%- endcall %}\n\n -- cleanup\n {% if existing_relation is not none %}\n /* Do the equivalent of rename_if_exists. 'existing_relation' could have been dropped\n since the variable was first set. */\n {% set existing_relation = load_cached_relation(existing_relation) %}\n {% if existing_relation is not none %}\n {{ adapter.rename_relation(existing_relation, backup_relation) }}\n {% endif %}\n {% endif %}\n\n {{ adapter.rename_relation(intermediate_relation, target_relation) }}\n\n {% do create_indexes(target_relation) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n -- finally, drop the existing/backup relation after the commit\n {{ drop_relation_if_exists(backup_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n{% endmaterialization %}", + "depends_on": { + "macros": [ + "macro.dbt.load_cached_relation", + "macro.dbt.make_intermediate_relation", + "macro.dbt.make_backup_relation", + "macro.dbt.drop_relation_if_exists", + "macro.dbt.run_hooks", + "macro.dbt.statement", + "macro.dbt.get_create_table_as_sql", + "macro.dbt.create_indexes", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.102702, + "supported_languages": ["sql"] + }, + "macro.dbt.materialization_view_default": { + "name": "materialization_view_default", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/view.sql", + "original_file_path": "macros/materializations/models/view.sql", + "unique_id": "macro.dbt.materialization_view_default", + "macro_sql": "{%- materialization view, default -%}\n\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set target_relation = this.incorporate(type='view') -%}\n {%- set intermediate_relation = make_intermediate_relation(target_relation) -%}\n\n -- the intermediate_relation should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%}\n /*\n This relation (probably) doesn't exist yet. If it does exist, it's a leftover from\n a previous run, and we're going to try to drop it immediately. At the end of this\n materialization, we're going to rename the \"existing_relation\" to this identifier,\n and then we're going to drop it. In order to make sure we run the correct one of:\n - drop view ...\n - drop table ...\n\n We need to set the type of this relation to be the type of the existing_relation, if it exists,\n or else \"view\" as a sane default if it does not. Note that if the existing_relation does not\n exist, then there is nothing to move out of the way and subsequentally drop. In that case,\n this relation will be effectively unused.\n */\n {%- set backup_relation_type = 'view' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n -- as above, the backup_relation should not already exist\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- drop the temp relations if they exist already in the database\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% call statement('main') -%}\n {{ get_create_view_as_sql(intermediate_relation, sql) }}\n {%- endcall %}\n\n -- cleanup\n -- move the existing view out of the way\n {% if existing_relation is not none %}\n /* Do the equivalent of rename_if_exists. 'existing_relation' could have been dropped\n since the variable was first set. */\n {% set existing_relation = load_cached_relation(existing_relation) %}\n {% if existing_relation is not none %}\n {{ adapter.rename_relation(existing_relation, backup_relation) }}\n {% endif %}\n {% endif %}\n {{ adapter.rename_relation(intermediate_relation, target_relation) }}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {{ adapter.commit() }}\n\n {{ drop_relation_if_exists(backup_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{%- endmaterialization -%}", + "depends_on": { + "macros": [ + "macro.dbt.load_cached_relation", + "macro.dbt.make_intermediate_relation", + "macro.dbt.make_backup_relation", + "macro.dbt.run_hooks", + "macro.dbt.drop_relation_if_exists", + "macro.dbt.statement", + "macro.dbt.get_create_view_as_sql", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1066535, + "supported_languages": ["sql"] + }, + "macro.dbt.incremental_validate_on_schema_change": { + "name": "incremental_validate_on_schema_change", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/on_schema_change.sql", + "original_file_path": "macros/materializations/models/incremental/on_schema_change.sql", + "unique_id": "macro.dbt.incremental_validate_on_schema_change", + "macro_sql": "{% macro incremental_validate_on_schema_change(on_schema_change, default='ignore') %}\n\n {% if on_schema_change not in ['sync_all_columns', 'append_new_columns', 'fail', 'ignore'] %}\n\n {% set log_message = 'Invalid value for on_schema_change (%s) specified. Setting default value of %s.' % (on_schema_change, default) %}\n {% do log(log_message) %}\n\n {{ return(default) }}\n\n {% else %}\n\n {{ return(on_schema_change) }}\n\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1121187, + "supported_languages": null + }, + "macro.dbt.check_for_schema_changes": { + "name": "check_for_schema_changes", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/on_schema_change.sql", + "original_file_path": "macros/materializations/models/incremental/on_schema_change.sql", + "unique_id": "macro.dbt.check_for_schema_changes", + "macro_sql": "{% macro check_for_schema_changes(source_relation, target_relation) %}\n\n {% set schema_changed = False %}\n\n {%- set source_columns = adapter.get_columns_in_relation(source_relation) -%}\n {%- set target_columns = adapter.get_columns_in_relation(target_relation) -%}\n {%- set source_not_in_target = diff_columns(source_columns, target_columns) -%}\n {%- set target_not_in_source = diff_columns(target_columns, source_columns) -%}\n\n {% set new_target_types = diff_column_data_types(source_columns, target_columns) %}\n\n {% if source_not_in_target != [] %}\n {% set schema_changed = True %}\n {% elif target_not_in_source != [] or new_target_types != [] %}\n {% set schema_changed = True %}\n {% elif new_target_types != [] %}\n {% set schema_changed = True %}\n {% endif %}\n\n {% set changes_dict = {\n 'schema_changed': schema_changed,\n 'source_not_in_target': source_not_in_target,\n 'target_not_in_source': target_not_in_source,\n 'source_columns': source_columns,\n 'target_columns': target_columns,\n 'new_target_types': new_target_types\n } %}\n\n {% set msg %}\n In {{ target_relation }}:\n Schema changed: {{ schema_changed }}\n Source columns not in target: {{ source_not_in_target }}\n Target columns not in source: {{ target_not_in_source }}\n New column types: {{ new_target_types }}\n {% endset %}\n\n {% do log(msg) %}\n\n {{ return(changes_dict) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.diff_columns", "macro.dbt.diff_column_data_types"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1139607, + "supported_languages": null + }, + "macro.dbt.sync_column_schemas": { + "name": "sync_column_schemas", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/on_schema_change.sql", + "original_file_path": "macros/materializations/models/incremental/on_schema_change.sql", + "unique_id": "macro.dbt.sync_column_schemas", + "macro_sql": "{% macro sync_column_schemas(on_schema_change, target_relation, schema_changes_dict) %}\n\n {%- set add_to_target_arr = schema_changes_dict['source_not_in_target'] -%}\n\n {%- if on_schema_change == 'append_new_columns'-%}\n {%- if add_to_target_arr | length > 0 -%}\n {%- do alter_relation_add_remove_columns(target_relation, add_to_target_arr, none) -%}\n {%- endif -%}\n\n {% elif on_schema_change == 'sync_all_columns' %}\n {%- set remove_from_target_arr = schema_changes_dict['target_not_in_source'] -%}\n {%- set new_target_types = schema_changes_dict['new_target_types'] -%}\n\n {% if add_to_target_arr | length > 0 or remove_from_target_arr | length > 0 %}\n {%- do alter_relation_add_remove_columns(target_relation, add_to_target_arr, remove_from_target_arr) -%}\n {% endif %}\n\n {% if new_target_types != [] %}\n {% for ntt in new_target_types %}\n {% set column_name = ntt['column_name'] %}\n {% set new_type = ntt['new_type'] %}\n {% do alter_column_type(target_relation, column_name, new_type) %}\n {% endfor %}\n {% endif %}\n\n {% endif %}\n\n {% set schema_change_message %}\n In {{ target_relation }}:\n Schema change approach: {{ on_schema_change }}\n Columns added: {{ add_to_target_arr }}\n Columns removed: {{ remove_from_target_arr }}\n Data types changed: {{ new_target_types }}\n {% endset %}\n\n {% do log(schema_change_message) %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.alter_relation_add_remove_columns", + "macro.dbt.alter_column_type" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1159163, + "supported_languages": null + }, + "macro.dbt.process_schema_changes": { + "name": "process_schema_changes", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/on_schema_change.sql", + "original_file_path": "macros/materializations/models/incremental/on_schema_change.sql", + "unique_id": "macro.dbt.process_schema_changes", + "macro_sql": "{% macro process_schema_changes(on_schema_change, source_relation, target_relation) %}\n\n {% if on_schema_change == 'ignore' %}\n\n {{ return({}) }}\n\n {% else %}\n\n {% set schema_changes_dict = check_for_schema_changes(source_relation, target_relation) %}\n\n {% if schema_changes_dict['schema_changed'] %}\n\n {% if on_schema_change == 'fail' %}\n\n {% set fail_msg %}\n The source and target schemas on this incremental model are out of sync!\n They can be reconciled in several ways:\n - set the `on_schema_change` config to either append_new_columns or sync_all_columns, depending on your situation.\n - Re-run the incremental model with `full_refresh: True` to update the target schema.\n - update the schema manually and re-run the process.\n\n Additional troubleshooting context:\n Source columns not in target: {{ schema_changes_dict['source_not_in_target'] }}\n Target columns not in source: {{ schema_changes_dict['target_not_in_source'] }}\n New column types: {{ schema_changes_dict['new_target_types'] }}\n {% endset %}\n\n {% do exceptions.raise_compiler_error(fail_msg) %}\n\n {# -- unless we ignore, run the sync operation per the config #}\n {% else %}\n\n {% do sync_column_schemas(on_schema_change, target_relation, schema_changes_dict) %}\n\n {% endif %}\n\n {% endif %}\n\n {{ return(schema_changes_dict['source_columns']) }}\n\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.check_for_schema_changes", + "macro.dbt.sync_column_schemas" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1172094, + "supported_languages": null + }, + "macro.dbt.get_merge_sql": { + "name": "get_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/merge.sql", + "original_file_path": "macros/materializations/models/incremental/merge.sql", + "unique_id": "macro.dbt.get_merge_sql", + "macro_sql": "{% macro get_merge_sql(target, source, unique_key, dest_columns, incremental_predicates=none) -%}\n -- back compat for old kwarg name\n {% set incremental_predicates = kwargs.get('predicates', incremental_predicates) %}\n {{ adapter.dispatch('get_merge_sql', 'dbt')(target, source, unique_key, dest_columns, incremental_predicates) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_merge_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1233573, + "supported_languages": null + }, + "macro.dbt.default__get_merge_sql": { + "name": "default__get_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/merge.sql", + "original_file_path": "macros/materializations/models/incremental/merge.sql", + "unique_id": "macro.dbt.default__get_merge_sql", + "macro_sql": "{% macro default__get_merge_sql(target, source, unique_key, dest_columns, incremental_predicates=none) -%}\n {%- set predicates = [] if incremental_predicates is none else [] + incremental_predicates -%}\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n {%- set merge_update_columns = config.get('merge_update_columns') -%}\n {%- set merge_exclude_columns = config.get('merge_exclude_columns') -%}\n {%- set update_columns = get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {% if unique_key %}\n {% if unique_key is sequence and unique_key is not mapping and unique_key is not string %}\n {% for key in unique_key %}\n {% set this_key_match %}\n DBT_INTERNAL_SOURCE.{{ key }} = DBT_INTERNAL_DEST.{{ key }}\n {% endset %}\n {% do predicates.append(this_key_match) %}\n {% endfor %}\n {% else %}\n {% set unique_key_match %}\n DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}\n {% endset %}\n {% do predicates.append(unique_key_match) %}\n {% endif %}\n {% else %}\n {% do predicates.append('FALSE') %}\n {% endif %}\n\n {{ sql_header if sql_header is not none }}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on {{\"(\" ~ predicates | join(\") and (\") ~ \")\"}}\n\n {% if unique_key %}\n when matched then update set\n {% for column_name in update_columns -%}\n {{ column_name }} = DBT_INTERNAL_SOURCE.{{ column_name }}\n {%- if not loop.last %}, {%- endif %}\n {%- endfor %}\n {% endif %}\n\n when not matched then insert\n ({{ dest_cols_csv }})\n values\n ({{ dest_cols_csv }})\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.get_quoted_csv", + "macro.dbt.get_merge_update_columns" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1259441, + "supported_languages": null + }, + "macro.dbt.get_delete_insert_merge_sql": { + "name": "get_delete_insert_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/merge.sql", + "original_file_path": "macros/materializations/models/incremental/merge.sql", + "unique_id": "macro.dbt.get_delete_insert_merge_sql", + "macro_sql": "{% macro get_delete_insert_merge_sql(target, source, unique_key, dest_columns, incremental_predicates) -%}\n {{ adapter.dispatch('get_delete_insert_merge_sql', 'dbt')(target, source, unique_key, dest_columns, incremental_predicates) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__get_delete_insert_merge_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1263351, + "supported_languages": null + }, + "macro.dbt.default__get_delete_insert_merge_sql": { + "name": "default__get_delete_insert_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/merge.sql", + "original_file_path": "macros/materializations/models/incremental/merge.sql", + "unique_id": "macro.dbt.default__get_delete_insert_merge_sql", + "macro_sql": "{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns, incremental_predicates) -%}\n\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n\n {% if unique_key %}\n {% if unique_key is sequence and unique_key is not string %}\n delete from {{target }}\n using {{ source }}\n where (\n {% for key in unique_key %}\n {{ source }}.{{ key }} = {{ target }}.{{ key }}\n {{ \"and \" if not loop.last}}\n {% endfor %}\n {% if incremental_predicates %}\n {% for predicate in incremental_predicates %}\n and {{ predicate }}\n {% endfor %}\n {% endif %}\n );\n {% else %}\n delete from {{ target }}\n where (\n {{ unique_key }}) in (\n select ({{ unique_key }})\n from {{ source }}\n )\n {%- if incremental_predicates %}\n {% for predicate in incremental_predicates %}\n and {{ predicate }}\n {% endfor %}\n {%- endif -%};\n\n {% endif %}\n {% endif %}\n\n insert into {{ target }} ({{ dest_cols_csv }})\n (\n select {{ dest_cols_csv }}\n from {{ source }}\n )\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_quoted_csv"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1279113, + "supported_languages": null + }, + "macro.dbt.get_insert_overwrite_merge_sql": { + "name": "get_insert_overwrite_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/merge.sql", + "original_file_path": "macros/materializations/models/incremental/merge.sql", + "unique_id": "macro.dbt.get_insert_overwrite_merge_sql", + "macro_sql": "{% macro get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header=false) -%}\n {{ adapter.dispatch('get_insert_overwrite_merge_sql', 'dbt')(target, source, dest_columns, predicates, include_sql_header) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_insert_overwrite_merge_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1283085, + "supported_languages": null + }, + "macro.dbt.default__get_insert_overwrite_merge_sql": { + "name": "default__get_insert_overwrite_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/merge.sql", + "original_file_path": "macros/materializations/models/incremental/merge.sql", + "unique_id": "macro.dbt.default__get_insert_overwrite_merge_sql", + "macro_sql": "{% macro default__get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header) -%}\n {#-- The only time include_sql_header is True: --#}\n {#-- BigQuery + insert_overwrite strategy + \"static\" partitions config --#}\n {#-- We should consider including the sql header at the materialization level instead --#}\n\n {%- set predicates = [] if predicates is none else [] + predicates -%}\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none and include_sql_header }}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on FALSE\n\n when not matched by source\n {% if predicates %} and {{ predicates | join(' and ') }} {% endif %}\n then delete\n\n when not matched then insert\n ({{ dest_cols_csv }})\n values\n ({{ dest_cols_csv }})\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_quoted_csv"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1293163, + "supported_languages": null + }, + "macro.dbt.materialization_incremental_default": { + "name": "materialization_incremental_default", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/incremental.sql", + "original_file_path": "macros/materializations/models/incremental/incremental.sql", + "unique_id": "macro.dbt.materialization_incremental_default", + "macro_sql": "{% materialization incremental, default -%}\n\n -- relations\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set target_relation = this.incorporate(type='table') -%}\n {%- set temp_relation = make_temp_relation(target_relation)-%}\n {%- set intermediate_relation = make_intermediate_relation(target_relation)-%}\n {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n\n -- configs\n {%- set unique_key = config.get('unique_key') -%}\n {%- set full_refresh_mode = (should_full_refresh() or existing_relation.is_view) -%}\n {%- set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') -%}\n\n -- the temp_ and backup_ relations should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation. This has to happen before\n -- BEGIN, in a separate transaction\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation)-%}\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set to_drop = [] %}\n\n {% if existing_relation is none %}\n {% set build_sql = get_create_table_as_sql(False, target_relation, sql) %}\n {% elif full_refresh_mode %}\n {% set build_sql = get_create_table_as_sql(False, intermediate_relation, sql) %}\n {% set need_swap = true %}\n {% else %}\n {% do run_query(get_create_table_as_sql(True, temp_relation, sql)) %}\n {% do adapter.expand_target_column_types(\n from_relation=temp_relation,\n to_relation=target_relation) %}\n {#-- Process schema changes. Returns dict of changes if successful. Use source columns for upserting/merging --#}\n {% set dest_columns = process_schema_changes(on_schema_change, temp_relation, existing_relation) %}\n {% if not dest_columns %}\n {% set dest_columns = adapter.get_columns_in_relation(existing_relation) %}\n {% endif %}\n\n {#-- Get the incremental_strategy, the macro to use for the strategy, and build the sql --#}\n {% set incremental_strategy = config.get('incremental_strategy') or 'default' %}\n {% set incremental_predicates = config.get('predicates', none) or config.get('incremental_predicates', none) %}\n {% set strategy_sql_macro_func = adapter.get_incremental_strategy_macro(context, incremental_strategy) %}\n {% set strategy_arg_dict = ({'target_relation': target_relation, 'temp_relation': temp_relation, 'unique_key': unique_key, 'dest_columns': dest_columns, 'incremental_predicates': incremental_predicates }) %}\n {% set build_sql = strategy_sql_macro_func(strategy_arg_dict) %}\n\n {% endif %}\n\n {% call statement(\"main\") %}\n {{ build_sql }}\n {% endcall %}\n\n {% if need_swap %}\n {% do adapter.rename_relation(target_relation, backup_relation) %}\n {% do adapter.rename_relation(intermediate_relation, target_relation) %}\n {% do to_drop.append(backup_relation) %}\n {% endif %}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {% if existing_relation is none or existing_relation.is_view or should_full_refresh() %}\n {% do create_indexes(target_relation) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {% do adapter.commit() %}\n\n {% for rel in to_drop %}\n {% do adapter.drop_relation(rel) %}\n {% endfor %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{%- endmaterialization %}", + "depends_on": { + "macros": [ + "macro.dbt.load_cached_relation", + "macro.dbt.make_temp_relation", + "macro.dbt.make_intermediate_relation", + "macro.dbt.make_backup_relation", + "macro.dbt.should_full_refresh", + "macro.dbt.incremental_validate_on_schema_change", + "macro.dbt.drop_relation_if_exists", + "macro.dbt.run_hooks", + "macro.dbt.get_create_table_as_sql", + "macro.dbt.run_query", + "macro.dbt.process_schema_changes", + "macro.dbt.statement", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs", + "macro.dbt.create_indexes" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1362484, + "supported_languages": ["sql"] + }, + "macro.dbt.get_incremental_append_sql": { + "name": "get_incremental_append_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.get_incremental_append_sql", + "macro_sql": "{% macro get_incremental_append_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_append_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_incremental_append_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1375656, + "supported_languages": null + }, + "macro.dbt.default__get_incremental_append_sql": { + "name": "default__get_incremental_append_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.default__get_incremental_append_sql", + "macro_sql": "{% macro default__get_incremental_append_sql(arg_dict) %}\n\n {% do return(get_insert_into_sql(arg_dict[\"target_relation\"], arg_dict[\"temp_relation\"], arg_dict[\"dest_columns\"])) %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_insert_into_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.137946, + "supported_languages": null + }, + "macro.dbt.get_incremental_delete_insert_sql": { + "name": "get_incremental_delete_insert_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.get_incremental_delete_insert_sql", + "macro_sql": "{% macro get_incremental_delete_insert_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_delete_insert_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_incremental_delete_insert_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1382306, + "supported_languages": null + }, + "macro.dbt.default__get_incremental_delete_insert_sql": { + "name": "default__get_incremental_delete_insert_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.default__get_incremental_delete_insert_sql", + "macro_sql": "{% macro default__get_incremental_delete_insert_sql(arg_dict) %}\n\n {% do return(get_delete_insert_merge_sql(arg_dict[\"target_relation\"], arg_dict[\"temp_relation\"], arg_dict[\"unique_key\"], arg_dict[\"dest_columns\"], arg_dict[\"incremental_predicates\"])) %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_delete_insert_merge_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1386573, + "supported_languages": null + }, + "macro.dbt.get_incremental_merge_sql": { + "name": "get_incremental_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.get_incremental_merge_sql", + "macro_sql": "{% macro get_incremental_merge_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_merge_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_incremental_merge_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.138932, + "supported_languages": null + }, + "macro.dbt.default__get_incremental_merge_sql": { + "name": "default__get_incremental_merge_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.default__get_incremental_merge_sql", + "macro_sql": "{% macro default__get_incremental_merge_sql(arg_dict) %}\n\n {% do return(get_merge_sql(arg_dict[\"target_relation\"], arg_dict[\"temp_relation\"], arg_dict[\"unique_key\"], arg_dict[\"dest_columns\"], arg_dict[\"incremental_predicates\"])) %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_merge_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1394079, + "supported_languages": null + }, + "macro.dbt.get_incremental_insert_overwrite_sql": { + "name": "get_incremental_insert_overwrite_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.get_incremental_insert_overwrite_sql", + "macro_sql": "{% macro get_incremental_insert_overwrite_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_insert_overwrite_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_incremental_insert_overwrite_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.139687, + "supported_languages": null + }, + "macro.dbt.default__get_incremental_insert_overwrite_sql": { + "name": "default__get_incremental_insert_overwrite_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.default__get_incremental_insert_overwrite_sql", + "macro_sql": "{% macro default__get_incremental_insert_overwrite_sql(arg_dict) %}\n\n {% do return(get_insert_overwrite_merge_sql(arg_dict[\"target_relation\"], arg_dict[\"temp_relation\"], arg_dict[\"dest_columns\"], arg_dict[\"incremental_predicates\"])) %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_insert_overwrite_merge_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1400712, + "supported_languages": null + }, + "macro.dbt.get_incremental_default_sql": { + "name": "get_incremental_default_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.get_incremental_default_sql", + "macro_sql": "{% macro get_incremental_default_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_default_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__get_incremental_default_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1403382, + "supported_languages": null + }, + "macro.dbt.default__get_incremental_default_sql": { + "name": "default__get_incremental_default_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.default__get_incremental_default_sql", + "macro_sql": "{% macro default__get_incremental_default_sql(arg_dict) %}\n\n {% do return(get_incremental_append_sql(arg_dict)) %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_incremental_append_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.140572, + "supported_languages": null + }, + "macro.dbt.get_insert_into_sql": { + "name": "get_insert_into_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/strategies.sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "unique_id": "macro.dbt.get_insert_into_sql", + "macro_sql": "{% macro get_insert_into_sql(target_relation, temp_relation, dest_columns) %}\n\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n\n insert into {{ target_relation }} ({{ dest_cols_csv }})\n (\n select {{ dest_cols_csv }}\n from {{ temp_relation }}\n )\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_quoted_csv"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.141023, + "supported_languages": null + }, + "macro.dbt.is_incremental": { + "name": "is_incremental", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/is_incremental.sql", + "original_file_path": "macros/materializations/models/incremental/is_incremental.sql", + "unique_id": "macro.dbt.is_incremental", + "macro_sql": "{% macro is_incremental() %}\n {#-- do not run introspective queries in parsing #}\n {% if not execute %}\n {{ return(False) }}\n {% else %}\n {% set relation = adapter.get_relation(this.database, this.schema, this.table) %}\n {{ return(relation is not none\n and relation.type == 'table'\n and model.config.materialized == 'incremental'\n and not should_full_refresh()) }}\n {% endif %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.should_full_refresh"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.141959, + "supported_languages": null + }, + "macro.dbt.get_quoted_csv": { + "name": "get_quoted_csv", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/column_helpers.sql", + "original_file_path": "macros/materializations/models/incremental/column_helpers.sql", + "unique_id": "macro.dbt.get_quoted_csv", + "macro_sql": "{% macro get_quoted_csv(column_names) %}\n\n {% set quoted = [] %}\n {% for col in column_names -%}\n {%- do quoted.append(adapter.quote(col)) -%}\n {%- endfor %}\n\n {%- set dest_cols_csv = quoted | join(', ') -%}\n {{ return(dest_cols_csv) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1437864, + "supported_languages": null + }, + "macro.dbt.diff_columns": { + "name": "diff_columns", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/column_helpers.sql", + "original_file_path": "macros/materializations/models/incremental/column_helpers.sql", + "unique_id": "macro.dbt.diff_columns", + "macro_sql": "{% macro diff_columns(source_columns, target_columns) %}\n\n {% set result = [] %}\n {% set source_names = source_columns | map(attribute = 'column') | list %}\n {% set target_names = target_columns | map(attribute = 'column') | list %}\n\n {# --check whether the name attribute exists in the target - this does not perform a data type check #}\n {% for sc in source_columns %}\n {% if sc.name not in target_names %}\n {{ result.append(sc) }}\n {% endif %}\n {% endfor %}\n\n {{ return(result) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1446285, + "supported_languages": null + }, + "macro.dbt.diff_column_data_types": { + "name": "diff_column_data_types", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/column_helpers.sql", + "original_file_path": "macros/materializations/models/incremental/column_helpers.sql", + "unique_id": "macro.dbt.diff_column_data_types", + "macro_sql": "{% macro diff_column_data_types(source_columns, target_columns) %}\n\n {% set result = [] %}\n {% for sc in source_columns %}\n {% set tc = target_columns | selectattr(\"name\", \"equalto\", sc.name) | list | first %}\n {% if tc %}\n {% if sc.data_type != tc.data_type and not sc.can_expand_to(other_column=tc) %}\n {{ result.append( { 'column_name': tc.name, 'new_type': sc.data_type } ) }}\n {% endif %}\n {% endif %}\n {% endfor %}\n\n {{ return(result) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1457114, + "supported_languages": null + }, + "macro.dbt.get_merge_update_columns": { + "name": "get_merge_update_columns", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/column_helpers.sql", + "original_file_path": "macros/materializations/models/incremental/column_helpers.sql", + "unique_id": "macro.dbt.get_merge_update_columns", + "macro_sql": "{% macro get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) %}\n {{ return(adapter.dispatch('get_merge_update_columns', 'dbt')(merge_update_columns, merge_exclude_columns, dest_columns)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_merge_update_columns"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1461127, + "supported_languages": null + }, + "macro.dbt.default__get_merge_update_columns": { + "name": "default__get_merge_update_columns", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/incremental/column_helpers.sql", + "original_file_path": "macros/materializations/models/incremental/column_helpers.sql", + "unique_id": "macro.dbt.default__get_merge_update_columns", + "macro_sql": "{% macro default__get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) %}\n {%- set default_cols = dest_columns | map(attribute=\"quoted\") | list -%}\n\n {%- if merge_update_columns and merge_exclude_columns -%}\n {{ exceptions.raise_compiler_error(\n 'Model cannot specify merge_update_columns and merge_exclude_columns. Please update model to use only one config'\n )}}\n {%- elif merge_update_columns -%}\n {%- set update_columns = merge_update_columns -%}\n {%- elif merge_exclude_columns -%}\n {%- set update_columns = [] -%}\n {%- for column in dest_columns -%}\n {% if column.column | lower not in merge_exclude_columns | map(\"lower\") | list %}\n {%- do update_columns.append(column.quoted) -%}\n {% endif %}\n {%- endfor -%}\n {%- else -%}\n {%- set update_columns = default_cols -%}\n {%- endif -%}\n\n {{ return(update_columns) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1471882, + "supported_languages": null + }, + "macro.dbt.can_clone_table": { + "name": "can_clone_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/clone/can_clone_table.sql", + "original_file_path": "macros/materializations/models/clone/can_clone_table.sql", + "unique_id": "macro.dbt.can_clone_table", + "macro_sql": "{% macro can_clone_table() %}\n {{ return(adapter.dispatch('can_clone_table', 'dbt')()) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__can_clone_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1475563, + "supported_languages": null + }, + "macro.dbt.default__can_clone_table": { + "name": "default__can_clone_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/clone/can_clone_table.sql", + "original_file_path": "macros/materializations/models/clone/can_clone_table.sql", + "unique_id": "macro.dbt.default__can_clone_table", + "macro_sql": "{% macro default__can_clone_table() %}\n {{ return(False) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1477294, + "supported_languages": null + }, + "macro.dbt.materialization_clone_default": { + "name": "materialization_clone_default", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/clone/clone.sql", + "original_file_path": "macros/materializations/models/clone/clone.sql", + "unique_id": "macro.dbt.materialization_clone_default", + "macro_sql": "{%- materialization clone, default -%}\n\n {%- set relations = {'relations': []} -%}\n\n {%- if not defer_relation -%}\n -- nothing to do\n {{ log(\"No relation found in state manifest for \" ~ model.unique_id, info=True) }}\n {{ return(relations) }}\n {%- endif -%}\n\n {%- set existing_relation = load_cached_relation(this) -%}\n\n {%- if existing_relation and not flags.FULL_REFRESH -%}\n -- noop!\n {{ log(\"Relation \" ~ existing_relation ~ \" already exists\", info=True) }}\n {{ return(relations) }}\n {%- endif -%}\n\n {%- set other_existing_relation = load_cached_relation(defer_relation) -%}\n\n -- If this is a database that can do zero-copy cloning of tables, and the other relation is a table, then this will be a table\n -- Otherwise, this will be a view\n\n {% set can_clone_table = can_clone_table() %}\n\n {%- if other_existing_relation and other_existing_relation.type == 'table' and can_clone_table -%}\n\n {%- set target_relation = this.incorporate(type='table') -%}\n {% if existing_relation is not none and not existing_relation.is_table %}\n {{ log(\"Dropping relation \" ~ existing_relation ~ \" because it is of type \" ~ existing_relation.type) }}\n {{ drop_relation_if_exists(existing_relation) }}\n {% endif %}\n\n -- as a general rule, data platforms that can clone tables can also do atomic 'create or replace'\n {% call statement('main') %}\n {% if target_relation and defer_relation and target_relation == defer_relation %}\n {{ log(\"Target relation and defer relation are the same, skipping clone for relation: \" ~ target_relation) }}\n {% else %}\n {{ create_or_replace_clone(target_relation, defer_relation) }}\n {% endif %}\n\n {% endcall %}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n {% do persist_docs(target_relation, model) %}\n\n {{ return({'relations': [target_relation]}) }}\n\n {%- else -%}\n\n {%- set target_relation = this.incorporate(type='view') -%}\n\n -- reuse the view materialization\n -- TODO: support actual dispatch for materialization macros\n -- Tracking ticket: https://github.com/dbt-labs/dbt-core/issues/7799\n {% set search_name = \"materialization_view_\" ~ adapter.type() %}\n {% if not search_name in context %}\n {% set search_name = \"materialization_view_default\" %}\n {% endif %}\n {% set materialization_macro = context[search_name] %}\n {% set relations = materialization_macro() %}\n {{ return(relations) }}\n\n {%- endif -%}\n\n{%- endmaterialization -%}", + "depends_on": { + "macros": [ + "macro.dbt.load_cached_relation", + "macro.dbt.can_clone_table", + "macro.dbt.drop_relation_if_exists", + "macro.dbt.statement", + "macro.dbt.create_or_replace_clone", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants", + "macro.dbt.persist_docs" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1526177, + "supported_languages": ["sql"] + }, + "macro.dbt.create_or_replace_clone": { + "name": "create_or_replace_clone", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/clone/create_or_replace_clone.sql", + "original_file_path": "macros/materializations/models/clone/create_or_replace_clone.sql", + "unique_id": "macro.dbt.create_or_replace_clone", + "macro_sql": "{% macro create_or_replace_clone(this_relation, defer_relation) %}\n {{ return(adapter.dispatch('create_or_replace_clone', 'dbt')(this_relation, defer_relation)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__create_or_replace_clone"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1531436, + "supported_languages": null + }, + "macro.dbt.default__create_or_replace_clone": { + "name": "default__create_or_replace_clone", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/materializations/models/clone/create_or_replace_clone.sql", + "original_file_path": "macros/materializations/models/clone/create_or_replace_clone.sql", + "unique_id": "macro.dbt.default__create_or_replace_clone", + "macro_sql": "{% macro default__create_or_replace_clone(this_relation, defer_relation) %}\n create or replace table {{ this_relation }} clone {{ defer_relation }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1533582, + "supported_languages": null + }, + "macro.dbt.listagg": { + "name": "listagg", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/listagg.sql", + "original_file_path": "macros/utils/listagg.sql", + "unique_id": "macro.dbt.listagg", + "macro_sql": "{% macro listagg(measure, delimiter_text=\"','\", order_by_clause=none, limit_num=none) -%}\n {{ return(adapter.dispatch('listagg', 'dbt') (measure, delimiter_text, order_by_clause, limit_num)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__listagg"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1541204, + "supported_languages": null + }, + "macro.dbt.default__listagg": { + "name": "default__listagg", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/listagg.sql", + "original_file_path": "macros/utils/listagg.sql", + "unique_id": "macro.dbt.default__listagg", + "macro_sql": "{% macro default__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}\n\n {% if limit_num -%}\n array_to_string(\n array_slice(\n array_agg(\n {{ measure }}\n ){% if order_by_clause -%}\n within group ({{ order_by_clause }})\n {%- endif %}\n ,0\n ,{{ limit_num }}\n ),\n {{ delimiter_text }}\n )\n {%- else %}\n listagg(\n {{ measure }},\n {{ delimiter_text }}\n )\n {% if order_by_clause -%}\n within group ({{ order_by_clause }})\n {%- endif %}\n {%- endif %}\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.154758, + "supported_languages": null + }, + "macro.dbt.array_append": { + "name": "array_append", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/array_append.sql", + "original_file_path": "macros/utils/array_append.sql", + "unique_id": "macro.dbt.array_append", + "macro_sql": "{% macro array_append(array, new_element) -%}\n {{ return(adapter.dispatch('array_append', 'dbt')(array, new_element)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__array_append"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.155188, + "supported_languages": null + }, + "macro.dbt.default__array_append": { + "name": "default__array_append", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/array_append.sql", + "original_file_path": "macros/utils/array_append.sql", + "unique_id": "macro.dbt.default__array_append", + "macro_sql": "{% macro default__array_append(array, new_element) -%}\n array_append({{ array }}, {{ new_element }})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.155388, + "supported_languages": null + }, + "macro.dbt.split_part": { + "name": "split_part", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/split_part.sql", + "original_file_path": "macros/utils/split_part.sql", + "unique_id": "macro.dbt.split_part", + "macro_sql": "{% macro split_part(string_text, delimiter_text, part_number) %}\n {{ return(adapter.dispatch('split_part', 'dbt') (string_text, delimiter_text, part_number)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__split_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1560102, + "supported_languages": null + }, + "macro.dbt.default__split_part": { + "name": "default__split_part", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/split_part.sql", + "original_file_path": "macros/utils/split_part.sql", + "unique_id": "macro.dbt.default__split_part", + "macro_sql": "{% macro default__split_part(string_text, delimiter_text, part_number) %}\n\n split_part(\n {{ string_text }},\n {{ delimiter_text }},\n {{ part_number }}\n )\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1562617, + "supported_languages": null + }, + "macro.dbt._split_part_negative": { + "name": "_split_part_negative", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/split_part.sql", + "original_file_path": "macros/utils/split_part.sql", + "unique_id": "macro.dbt._split_part_negative", + "macro_sql": "{% macro _split_part_negative(string_text, delimiter_text, part_number) %}\n\n split_part(\n {{ string_text }},\n {{ delimiter_text }},\n length({{ string_text }})\n - length(\n replace({{ string_text }}, {{ delimiter_text }}, '')\n ) + 2 + {{ part_number }}\n )\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1566188, + "supported_languages": null + }, + "macro.dbt.string_literal": { + "name": "string_literal", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/literal.sql", + "original_file_path": "macros/utils/literal.sql", + "unique_id": "macro.dbt.string_literal", + "macro_sql": "{%- macro string_literal(value) -%}\n {{ return(adapter.dispatch('string_literal', 'dbt') (value)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__string_literal"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.156974, + "supported_languages": null + }, + "macro.dbt.default__string_literal": { + "name": "default__string_literal", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/literal.sql", + "original_file_path": "macros/utils/literal.sql", + "unique_id": "macro.dbt.default__string_literal", + "macro_sql": "{% macro default__string_literal(value) -%}\n '{{ value }}'\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.157133, + "supported_languages": null + }, + "macro.dbt.datediff": { + "name": "datediff", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/datediff.sql", + "original_file_path": "macros/utils/datediff.sql", + "unique_id": "macro.dbt.datediff", + "macro_sql": "{% macro datediff(first_date, second_date, datepart) %}\n {{ return(adapter.dispatch('datediff', 'dbt')(first_date, second_date, datepart)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__datediff"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.157609, + "supported_languages": null + }, + "macro.dbt.default__datediff": { + "name": "default__datediff", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/datediff.sql", + "original_file_path": "macros/utils/datediff.sql", + "unique_id": "macro.dbt.default__datediff", + "macro_sql": "{% macro default__datediff(first_date, second_date, datepart) -%}\n\n datediff(\n {{ datepart }},\n {{ first_date }},\n {{ second_date }}\n )\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.157855, + "supported_languages": null + }, + "macro.dbt.date_trunc": { + "name": "date_trunc", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/date_trunc.sql", + "original_file_path": "macros/utils/date_trunc.sql", + "unique_id": "macro.dbt.date_trunc", + "macro_sql": "{% macro date_trunc(datepart, date) -%}\n {{ return(adapter.dispatch('date_trunc', 'dbt') (datepart, date)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__date_trunc"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1582332, + "supported_languages": null + }, + "macro.dbt.default__date_trunc": { + "name": "default__date_trunc", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/date_trunc.sql", + "original_file_path": "macros/utils/date_trunc.sql", + "unique_id": "macro.dbt.default__date_trunc", + "macro_sql": "{% macro default__date_trunc(datepart, date) -%}\n date_trunc('{{datepart}}', {{date}})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.158462, + "supported_languages": null + }, + "macro.dbt.any_value": { + "name": "any_value", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/any_value.sql", + "original_file_path": "macros/utils/any_value.sql", + "unique_id": "macro.dbt.any_value", + "macro_sql": "{% macro any_value(expression) -%}\n {{ return(adapter.dispatch('any_value', 'dbt') (expression)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__any_value"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1588387, + "supported_languages": null + }, + "macro.dbt.default__any_value": { + "name": "default__any_value", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/any_value.sql", + "original_file_path": "macros/utils/any_value.sql", + "unique_id": "macro.dbt.default__any_value", + "macro_sql": "{% macro default__any_value(expression) -%}\n\n any_value({{ expression }})\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1592915, + "supported_languages": null + }, + "macro.dbt.bool_or": { + "name": "bool_or", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/bool_or.sql", + "original_file_path": "macros/utils/bool_or.sql", + "unique_id": "macro.dbt.bool_or", + "macro_sql": "{% macro bool_or(expression) -%}\n {{ return(adapter.dispatch('bool_or', 'dbt') (expression)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__bool_or"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1596448, + "supported_languages": null + }, + "macro.dbt.default__bool_or": { + "name": "default__bool_or", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/bool_or.sql", + "original_file_path": "macros/utils/bool_or.sql", + "unique_id": "macro.dbt.default__bool_or", + "macro_sql": "{% macro default__bool_or(expression) -%}\n\n bool_or({{ expression }})\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1597996, + "supported_languages": null + }, + "macro.dbt.dateadd": { + "name": "dateadd", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/dateadd.sql", + "original_file_path": "macros/utils/dateadd.sql", + "unique_id": "macro.dbt.dateadd", + "macro_sql": "{% macro dateadd(datepart, interval, from_date_or_timestamp) %}\n {{ return(adapter.dispatch('dateadd', 'dbt')(datepart, interval, from_date_or_timestamp)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__dateadd"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.160293, + "supported_languages": null + }, + "macro.dbt.default__dateadd": { + "name": "default__dateadd", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/dateadd.sql", + "original_file_path": "macros/utils/dateadd.sql", + "unique_id": "macro.dbt.default__dateadd", + "macro_sql": "{% macro default__dateadd(datepart, interval, from_date_or_timestamp) %}\n\n dateadd(\n {{ datepart }},\n {{ interval }},\n {{ from_date_or_timestamp }}\n )\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1605422, + "supported_languages": null + }, + "macro.dbt.array_concat": { + "name": "array_concat", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/array_concat.sql", + "original_file_path": "macros/utils/array_concat.sql", + "unique_id": "macro.dbt.array_concat", + "macro_sql": "{% macro array_concat(array_1, array_2) -%}\n {{ return(adapter.dispatch('array_concat', 'dbt')(array_1, array_2)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__array_concat"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1609294, + "supported_languages": null + }, + "macro.dbt.default__array_concat": { + "name": "default__array_concat", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/array_concat.sql", + "original_file_path": "macros/utils/array_concat.sql", + "unique_id": "macro.dbt.default__array_concat", + "macro_sql": "{% macro default__array_concat(array_1, array_2) -%}\n array_cat({{ array_1 }}, {{ array_2 }})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.161128, + "supported_languages": null + }, + "macro.dbt.safe_cast": { + "name": "safe_cast", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/safe_cast.sql", + "original_file_path": "macros/utils/safe_cast.sql", + "unique_id": "macro.dbt.safe_cast", + "macro_sql": "{% macro safe_cast(field, type) %}\n {{ return(adapter.dispatch('safe_cast', 'dbt') (field, type)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__safe_cast"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1615262, + "supported_languages": null + }, + "macro.dbt.default__safe_cast": { + "name": "default__safe_cast", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/safe_cast.sql", + "original_file_path": "macros/utils/safe_cast.sql", + "unique_id": "macro.dbt.default__safe_cast", + "macro_sql": "{% macro default__safe_cast(field, type) %}\n {# most databases don't support this function yet\n so we just need to use cast #}\n cast({{field}} as {{type}})\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1617336, + "supported_languages": null + }, + "macro.dbt.last_day": { + "name": "last_day", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/last_day.sql", + "original_file_path": "macros/utils/last_day.sql", + "unique_id": "macro.dbt.last_day", + "macro_sql": "{% macro last_day(date, datepart) %}\n {{ return(adapter.dispatch('last_day', 'dbt') (date, datepart)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__last_day"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1622396, + "supported_languages": null + }, + "macro.dbt.default_last_day": { + "name": "default_last_day", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/last_day.sql", + "original_file_path": "macros/utils/last_day.sql", + "unique_id": "macro.dbt.default_last_day", + "macro_sql": "\n\n{%- macro default_last_day(date, datepart) -%}\n cast(\n {{dbt.dateadd('day', '-1',\n dbt.dateadd(datepart, '1', dbt.date_trunc(datepart, date))\n )}}\n as date)\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.dateadd", "macro.dbt.date_trunc"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1626341, + "supported_languages": null + }, + "macro.dbt.default__last_day": { + "name": "default__last_day", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/last_day.sql", + "original_file_path": "macros/utils/last_day.sql", + "unique_id": "macro.dbt.default__last_day", + "macro_sql": "{% macro default__last_day(date, datepart) -%}\n {{dbt.default_last_day(date, datepart)}}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default_last_day"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1628897, + "supported_languages": null + }, + "macro.dbt.get_powers_of_two": { + "name": "get_powers_of_two", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/generate_series.sql", + "original_file_path": "macros/utils/generate_series.sql", + "unique_id": "macro.dbt.get_powers_of_two", + "macro_sql": "{% macro get_powers_of_two(upper_bound) %}\n {{ return(adapter.dispatch('get_powers_of_two', 'dbt')(upper_bound)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_powers_of_two"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.163906, + "supported_languages": null + }, + "macro.dbt.default__get_powers_of_two": { + "name": "default__get_powers_of_two", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/generate_series.sql", + "original_file_path": "macros/utils/generate_series.sql", + "unique_id": "macro.dbt.default__get_powers_of_two", + "macro_sql": "{% macro default__get_powers_of_two(upper_bound) %}\n\n {% if upper_bound <= 0 %}\n {{ exceptions.raise_compiler_error(\"upper bound must be positive\") }}\n {% endif %}\n\n {% for _ in range(1, 100) %}\n {% if upper_bound <= 2 ** loop.index %}{{ return(loop.index) }}{% endif %}\n {% endfor %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.164624, + "supported_languages": null + }, + "macro.dbt.generate_series": { + "name": "generate_series", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/generate_series.sql", + "original_file_path": "macros/utils/generate_series.sql", + "unique_id": "macro.dbt.generate_series", + "macro_sql": "{% macro generate_series(upper_bound) %}\n {{ return(adapter.dispatch('generate_series', 'dbt')(upper_bound)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__generate_series"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1649115, + "supported_languages": null + }, + "macro.dbt.default__generate_series": { + "name": "default__generate_series", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/generate_series.sql", + "original_file_path": "macros/utils/generate_series.sql", + "unique_id": "macro.dbt.default__generate_series", + "macro_sql": "{% macro default__generate_series(upper_bound) %}\n\n {% set n = dbt.get_powers_of_two(upper_bound) %}\n\n with p as (\n select 0 as generated_number union all select 1\n ), unioned as (\n\n select\n\n {% for i in range(n) %}\n p{{i}}.generated_number * power(2, {{i}})\n {% if not loop.last %} + {% endif %}\n {% endfor %}\n + 1\n as generated_number\n\n from\n\n {% for i in range(n) %}\n p as p{{i}}\n {% if not loop.last %} cross join {% endif %}\n {% endfor %}\n\n )\n\n select *\n from unioned\n where generated_number <= {{upper_bound}}\n order by generated_number\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_powers_of_two"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1657271, + "supported_languages": null + }, + "macro.dbt.length": { + "name": "length", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/length.sql", + "original_file_path": "macros/utils/length.sql", + "unique_id": "macro.dbt.length", + "macro_sql": "{% macro length(expression) -%}\n {{ return(adapter.dispatch('length', 'dbt') (expression)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__length"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1661098, + "supported_languages": null + }, + "macro.dbt.default__length": { + "name": "default__length", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/length.sql", + "original_file_path": "macros/utils/length.sql", + "unique_id": "macro.dbt.default__length", + "macro_sql": "{% macro default__length(expression) %}\n\n length(\n {{ expression }}\n )\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.16627, + "supported_languages": null + }, + "macro.dbt.concat": { + "name": "concat", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/concat.sql", + "original_file_path": "macros/utils/concat.sql", + "unique_id": "macro.dbt.concat", + "macro_sql": "{% macro concat(fields) -%}\n {{ return(adapter.dispatch('concat', 'dbt')(fields)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__concat"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1666167, + "supported_languages": null + }, + "macro.dbt.default__concat": { + "name": "default__concat", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/concat.sql", + "original_file_path": "macros/utils/concat.sql", + "unique_id": "macro.dbt.default__concat", + "macro_sql": "{% macro default__concat(fields) -%}\n {{ fields|join(' || ') }}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1669059, + "supported_languages": null + }, + "macro.dbt.right": { + "name": "right", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/right.sql", + "original_file_path": "macros/utils/right.sql", + "unique_id": "macro.dbt.right", + "macro_sql": "{% macro right(string_text, length_expression) -%}\n {{ return(adapter.dispatch('right', 'dbt') (string_text, length_expression)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__right"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1673796, + "supported_languages": null + }, + "macro.dbt.default__right": { + "name": "default__right", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/right.sql", + "original_file_path": "macros/utils/right.sql", + "unique_id": "macro.dbt.default__right", + "macro_sql": "{% macro default__right(string_text, length_expression) %}\n\n right(\n {{ string_text }},\n {{ length_expression }}\n )\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1676745, + "supported_languages": null + }, + "macro.dbt.escape_single_quotes": { + "name": "escape_single_quotes", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/escape_single_quotes.sql", + "original_file_path": "macros/utils/escape_single_quotes.sql", + "unique_id": "macro.dbt.escape_single_quotes", + "macro_sql": "{% macro escape_single_quotes(expression) %}\n {{ return(adapter.dispatch('escape_single_quotes', 'dbt') (expression)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__escape_single_quotes"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1681378, + "supported_languages": null + }, + "macro.dbt.default__escape_single_quotes": { + "name": "default__escape_single_quotes", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/escape_single_quotes.sql", + "original_file_path": "macros/utils/escape_single_quotes.sql", + "unique_id": "macro.dbt.default__escape_single_quotes", + "macro_sql": "{% macro default__escape_single_quotes(expression) -%}\n{{ expression | replace(\"'\",\"''\") }}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1683912, + "supported_languages": null + }, + "macro.dbt.position": { + "name": "position", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/position.sql", + "original_file_path": "macros/utils/position.sql", + "unique_id": "macro.dbt.position", + "macro_sql": "{% macro position(substring_text, string_text) -%}\n {{ return(adapter.dispatch('position', 'dbt') (substring_text, string_text)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__position"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1689882, + "supported_languages": null + }, + "macro.dbt.default__position": { + "name": "default__position", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/position.sql", + "original_file_path": "macros/utils/position.sql", + "unique_id": "macro.dbt.default__position", + "macro_sql": "{% macro default__position(substring_text, string_text) %}\n\n position(\n {{ substring_text }} in {{ string_text }}\n )\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1692753, + "supported_languages": null + }, + "macro.dbt.replace": { + "name": "replace", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/replace.sql", + "original_file_path": "macros/utils/replace.sql", + "unique_id": "macro.dbt.replace", + "macro_sql": "{% macro replace(field, old_chars, new_chars) -%}\n {{ return(adapter.dispatch('replace', 'dbt') (field, old_chars, new_chars)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__replace"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.169902, + "supported_languages": null + }, + "macro.dbt.default__replace": { + "name": "default__replace", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/replace.sql", + "original_file_path": "macros/utils/replace.sql", + "unique_id": "macro.dbt.default__replace", + "macro_sql": "{% macro default__replace(field, old_chars, new_chars) %}\n\n replace(\n {{ field }},\n {{ old_chars }},\n {{ new_chars }}\n )\n\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1702304, + "supported_languages": null + }, + "macro.dbt.get_intervals_between": { + "name": "get_intervals_between", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/date_spine.sql", + "original_file_path": "macros/utils/date_spine.sql", + "unique_id": "macro.dbt.get_intervals_between", + "macro_sql": "{% macro get_intervals_between(start_date, end_date, datepart) -%}\n {{ return(adapter.dispatch('get_intervals_between', 'dbt')(start_date, end_date, datepart)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_intervals_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1715682, + "supported_languages": null + }, + "macro.dbt.default__get_intervals_between": { + "name": "default__get_intervals_between", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/date_spine.sql", + "original_file_path": "macros/utils/date_spine.sql", + "unique_id": "macro.dbt.default__get_intervals_between", + "macro_sql": "{% macro default__get_intervals_between(start_date, end_date, datepart) -%}\n {%- call statement('get_intervals_between', fetch_result=True) %}\n\n select {{ dbt.datediff(start_date, end_date, datepart) }}\n\n {%- endcall -%}\n\n {%- set value_list = load_result('get_intervals_between') -%}\n\n {%- if value_list and value_list['data'] -%}\n {%- set values = value_list['data'] | map(attribute=0) | list %}\n {{ return(values[0]) }}\n {%- else -%}\n {{ return(1) }}\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement", "macro.dbt.datediff"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1726625, + "supported_languages": null + }, + "macro.dbt.date_spine": { + "name": "date_spine", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/date_spine.sql", + "original_file_path": "macros/utils/date_spine.sql", + "unique_id": "macro.dbt.date_spine", + "macro_sql": "{% macro date_spine(datepart, start_date, end_date) %}\n {{ return(adapter.dispatch('date_spine', 'dbt')(datepart, start_date, end_date)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__date_spine"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1731026, + "supported_languages": null + }, + "macro.dbt.default__date_spine": { + "name": "default__date_spine", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/date_spine.sql", + "original_file_path": "macros/utils/date_spine.sql", + "unique_id": "macro.dbt.default__date_spine", + "macro_sql": "{% macro default__date_spine(datepart, start_date, end_date) %}\n\n\n {# call as follows:\n\n date_spine(\n \"day\",\n \"to_date('01/01/2016', 'mm/dd/yyyy')\",\n \"dbt.dateadd(week, 1, current_date)\"\n ) #}\n\n\n with rawdata as (\n\n {{dbt.generate_series(\n dbt.get_intervals_between(start_date, end_date, datepart)\n )}}\n\n ),\n\n all_periods as (\n\n select (\n {{\n dbt.dateadd(\n datepart,\n \"row_number() over (order by 1) - 1\",\n start_date\n )\n }}\n ) as date_{{datepart}}\n from rawdata\n\n ),\n\n filtered as (\n\n select *\n from all_periods\n where date_{{datepart}} <= {{ end_date }}\n\n )\n\n select * from filtered\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.generate_series", + "macro.dbt.get_intervals_between", + "macro.dbt.dateadd" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.173668, + "supported_languages": null + }, + "macro.dbt.cast_bool_to_text": { + "name": "cast_bool_to_text", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/cast_bool_to_text.sql", + "original_file_path": "macros/utils/cast_bool_to_text.sql", + "unique_id": "macro.dbt.cast_bool_to_text", + "macro_sql": "{% macro cast_bool_to_text(field) %}\n {{ adapter.dispatch('cast_bool_to_text', 'dbt') (field) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__cast_bool_to_text"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.174192, + "supported_languages": null + }, + "macro.dbt.default__cast_bool_to_text": { + "name": "default__cast_bool_to_text", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/cast_bool_to_text.sql", + "original_file_path": "macros/utils/cast_bool_to_text.sql", + "unique_id": "macro.dbt.default__cast_bool_to_text", + "macro_sql": "{% macro default__cast_bool_to_text(field) %}\n cast({{ field }} as {{ api.Column.translate_type('string') }})\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1744945, + "supported_languages": null + }, + "macro.dbt.type_string": { + "name": "type_string", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.type_string", + "macro_sql": "\n\n{%- macro type_string() -%}\n {{ return(adapter.dispatch('type_string', 'dbt')()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__type_string"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1759398, + "supported_languages": null + }, + "macro.dbt.default__type_string": { + "name": "default__type_string", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.default__type_string", + "macro_sql": "{% macro default__type_string() %}\n {{ return(api.Column.translate_type(\"string\")) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1762211, + "supported_languages": null + }, + "macro.dbt.type_timestamp": { + "name": "type_timestamp", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.type_timestamp", + "macro_sql": "\n\n{%- macro type_timestamp() -%}\n {{ return(adapter.dispatch('type_timestamp', 'dbt')()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__type_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1764665, + "supported_languages": null + }, + "macro.dbt.default__type_timestamp": { + "name": "default__type_timestamp", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.default__type_timestamp", + "macro_sql": "{% macro default__type_timestamp() %}\n {{ return(api.Column.translate_type(\"timestamp\")) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1768017, + "supported_languages": null + }, + "macro.dbt.type_float": { + "name": "type_float", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.type_float", + "macro_sql": "\n\n{%- macro type_float() -%}\n {{ return(adapter.dispatch('type_float', 'dbt')()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__type_float"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1770585, + "supported_languages": null + }, + "macro.dbt.default__type_float": { + "name": "default__type_float", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.default__type_float", + "macro_sql": "{% macro default__type_float() %}\n {{ return(api.Column.translate_type(\"float\")) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1772861, + "supported_languages": null + }, + "macro.dbt.type_numeric": { + "name": "type_numeric", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.type_numeric", + "macro_sql": "\n\n{%- macro type_numeric() -%}\n {{ return(adapter.dispatch('type_numeric', 'dbt')()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__type_numeric"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1776009, + "supported_languages": null + }, + "macro.dbt.default__type_numeric": { + "name": "default__type_numeric", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.default__type_numeric", + "macro_sql": "{% macro default__type_numeric() %}\n {{ return(api.Column.numeric_type(\"numeric\", 28, 6)) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1778865, + "supported_languages": null + }, + "macro.dbt.type_bigint": { + "name": "type_bigint", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.type_bigint", + "macro_sql": "\n\n{%- macro type_bigint() -%}\n {{ return(adapter.dispatch('type_bigint', 'dbt')()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__type_bigint"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1781456, + "supported_languages": null + }, + "macro.dbt.default__type_bigint": { + "name": "default__type_bigint", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.default__type_bigint", + "macro_sql": "{% macro default__type_bigint() %}\n {{ return(api.Column.translate_type(\"bigint\")) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1783795, + "supported_languages": null + }, + "macro.dbt.type_int": { + "name": "type_int", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.type_int", + "macro_sql": "\n\n{%- macro type_int() -%}\n {{ return(adapter.dispatch('type_int', 'dbt')()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__type_int"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1786263, + "supported_languages": null + }, + "macro.dbt.default__type_int": { + "name": "default__type_int", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.default__type_int", + "macro_sql": "{%- macro default__type_int() -%}\n {{ return(api.Column.translate_type(\"integer\")) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.178873, + "supported_languages": null + }, + "macro.dbt.type_boolean": { + "name": "type_boolean", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.type_boolean", + "macro_sql": "\n\n{%- macro type_boolean() -%}\n {{ return(adapter.dispatch('type_boolean', 'dbt')()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__type_boolean"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1791332, + "supported_languages": null + }, + "macro.dbt.default__type_boolean": { + "name": "default__type_boolean", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/data_types.sql", + "original_file_path": "macros/utils/data_types.sql", + "unique_id": "macro.dbt.default__type_boolean", + "macro_sql": "{%- macro default__type_boolean() -%}\n {{ return(api.Column.translate_type(\"boolean\")) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1794696, + "supported_languages": null + }, + "macro.dbt.intersect": { + "name": "intersect", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/intersect.sql", + "original_file_path": "macros/utils/intersect.sql", + "unique_id": "macro.dbt.intersect", + "macro_sql": "{% macro intersect() %}\n {{ return(adapter.dispatch('intersect', 'dbt')()) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__intersect"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1798964, + "supported_languages": null + }, + "macro.dbt.default__intersect": { + "name": "default__intersect", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/intersect.sql", + "original_file_path": "macros/utils/intersect.sql", + "unique_id": "macro.dbt.default__intersect", + "macro_sql": "{% macro default__intersect() %}\n\n intersect\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1800406, + "supported_languages": null + }, + "macro.dbt.array_construct": { + "name": "array_construct", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/array_construct.sql", + "original_file_path": "macros/utils/array_construct.sql", + "unique_id": "macro.dbt.array_construct", + "macro_sql": "{% macro array_construct(inputs=[], data_type=api.Column.translate_type('integer')) -%}\n {{ return(adapter.dispatch('array_construct', 'dbt')(inputs, data_type)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__array_construct"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1807265, + "supported_languages": null + }, + "macro.dbt.default__array_construct": { + "name": "default__array_construct", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/array_construct.sql", + "original_file_path": "macros/utils/array_construct.sql", + "unique_id": "macro.dbt.default__array_construct", + "macro_sql": "{% macro default__array_construct(inputs, data_type) -%}\n {% if inputs|length > 0 %}\n array[ {{ inputs|join(' , ') }} ]\n {% else %}\n array[]::{{data_type}}[]\n {% endif %}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1812296, + "supported_languages": null + }, + "macro.dbt.except": { + "name": "except", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/except.sql", + "original_file_path": "macros/utils/except.sql", + "unique_id": "macro.dbt.except", + "macro_sql": "{% macro except() %}\n {{ return(adapter.dispatch('except', 'dbt')()) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__except"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.181649, + "supported_languages": null + }, + "macro.dbt.default__except": { + "name": "default__except", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/except.sql", + "original_file_path": "macros/utils/except.sql", + "unique_id": "macro.dbt.default__except", + "macro_sql": "{% macro default__except() %}\n\n except\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.181772, + "supported_languages": null + }, + "macro.dbt.hash": { + "name": "hash", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/hash.sql", + "original_file_path": "macros/utils/hash.sql", + "unique_id": "macro.dbt.hash", + "macro_sql": "{% macro hash(field) -%}\n {{ return(adapter.dispatch('hash', 'dbt') (field)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.my_nesso_project.default__hash"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1821587, + "supported_languages": null + }, + "macro.dbt.default__hash": { + "name": "default__hash", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/utils/hash.sql", + "original_file_path": "macros/utils/hash.sql", + "unique_id": "macro.dbt.default__hash", + "macro_sql": "{% macro default__hash(field) -%}\n md5(cast({{ field }} as {{ api.Column.translate_type('string') }}))\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1824245, + "supported_languages": null + }, + "macro.dbt.get_rename_sql": { + "name": "get_rename_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/rename.sql", + "original_file_path": "macros/relations/rename.sql", + "unique_id": "macro.dbt.get_rename_sql", + "macro_sql": "{%- macro get_rename_sql(relation, new_name) -%}\n {{- log('Applying RENAME to: ' ~ relation) -}}\n {{- adapter.dispatch('get_rename_sql', 'dbt')(relation, new_name) -}}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.dbt.default__get_rename_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1835792, + "supported_languages": null + }, + "macro.dbt.default__get_rename_sql": { + "name": "default__get_rename_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/rename.sql", + "original_file_path": "macros/relations/rename.sql", + "unique_id": "macro.dbt.default__get_rename_sql", + "macro_sql": "{%- macro default__get_rename_sql(relation, new_name) -%}\n\n {%- if relation.is_view -%}\n {{ get_rename_view_sql(relation, new_name) }}\n\n {%- elif relation.is_table -%}\n {{ get_rename_table_sql(relation, new_name) }}\n\n {%- elif relation.is_materialized_view -%}\n {{ get_rename_materialized_view_sql(relation, new_name) }}\n\n {%- else -%}\n {{- exceptions.raise_compiler_error(\"`get_rename_sql` has not been implemented for: \" ~ relation.type ) -}}\n\n {%- endif -%}\n\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [ + "macro.dbt.get_rename_view_sql", + "macro.dbt.get_rename_table_sql", + "macro.dbt.get_rename_materialized_view_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1845474, + "supported_languages": null + }, + "macro.dbt.rename_relation": { + "name": "rename_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/rename.sql", + "original_file_path": "macros/relations/rename.sql", + "unique_id": "macro.dbt.rename_relation", + "macro_sql": "{% macro rename_relation(from_relation, to_relation) -%}\n {{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__rename_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1849978, + "supported_languages": null + }, + "macro.dbt.default__rename_relation": { + "name": "default__rename_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/rename.sql", + "original_file_path": "macros/relations/rename.sql", + "unique_id": "macro.dbt.default__rename_relation", + "macro_sql": "{% macro default__rename_relation(from_relation, to_relation) -%}\n {% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}\n {% call statement('rename_relation') -%}\n alter table {{ from_relation }} rename to {{ target_name }}\n {%- endcall %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1855528, + "supported_languages": null + }, + "macro.dbt.get_create_intermediate_sql": { + "name": "get_create_intermediate_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/create_intermediate.sql", + "original_file_path": "macros/relations/create_intermediate.sql", + "unique_id": "macro.dbt.get_create_intermediate_sql", + "macro_sql": "{%- macro get_create_intermediate_sql(relation, sql) -%}\n {{- log('Applying CREATE INTERMEDIATE to: ' ~ relation) -}}\n {{- adapter.dispatch('get_create_intermediate_sql', 'dbt')(relation, sql) -}}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.dbt.default__get_create_intermediate_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1863039, + "supported_languages": null + }, + "macro.dbt.default__get_create_intermediate_sql": { + "name": "default__get_create_intermediate_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/create_intermediate.sql", + "original_file_path": "macros/relations/create_intermediate.sql", + "unique_id": "macro.dbt.default__get_create_intermediate_sql", + "macro_sql": "{%- macro default__get_create_intermediate_sql(relation, sql) -%}\n\n -- get the standard intermediate name\n {% set intermediate_relation = make_intermediate_relation(relation) %}\n\n -- drop any pre-existing intermediate\n {{ get_drop_sql(intermediate_relation) }};\n\n {{ get_create_sql(intermediate_relation, sql) }}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.make_intermediate_relation", + "macro.dbt.get_drop_sql", + "macro.dbt.get_create_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1868281, + "supported_languages": null + }, + "macro.dbt.get_drop_sql": { + "name": "get_drop_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/drop.sql", + "original_file_path": "macros/relations/drop.sql", + "unique_id": "macro.dbt.get_drop_sql", + "macro_sql": "{%- macro get_drop_sql(relation) -%}\n {{- log('Applying DROP to: ' ~ relation) -}}\n {{- adapter.dispatch('get_drop_sql', 'dbt')(relation) -}}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.dbt.default__get_drop_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1878767, + "supported_languages": null + }, + "macro.dbt.default__get_drop_sql": { + "name": "default__get_drop_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/drop.sql", + "original_file_path": "macros/relations/drop.sql", + "unique_id": "macro.dbt.default__get_drop_sql", + "macro_sql": "{%- macro default__get_drop_sql(relation) -%}\n\n {%- if relation.is_view -%}\n {{ drop_view(relation) }}\n\n {%- elif relation.is_table -%}\n {{ drop_table(relation) }}\n\n {%- elif relation.is_materialized_view -%}\n {{ drop_materialized_view(relation) }}\n\n {%- else -%}\n drop {{ relation.type }} if exists {{ relation }} cascade\n\n {%- endif -%}\n\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [ + "macro.dbt.drop_view", + "macro.dbt.drop_table", + "macro.dbt.drop_materialized_view" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1885355, + "supported_languages": null + }, + "macro.dbt.drop_relation": { + "name": "drop_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/drop.sql", + "original_file_path": "macros/relations/drop.sql", + "unique_id": "macro.dbt.drop_relation", + "macro_sql": "{% macro drop_relation(relation) -%}\n {{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__drop_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1888561, + "supported_languages": null + }, + "macro.dbt.default__drop_relation": { + "name": "default__drop_relation", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/drop.sql", + "original_file_path": "macros/relations/drop.sql", + "unique_id": "macro.dbt.default__drop_relation", + "macro_sql": "{% macro default__drop_relation(relation) -%}\n {% call statement('drop_relation', auto_begin=False) -%}\n {{ get_drop_sql(relation) }}\n {%- endcall %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement", "macro.dbt.get_drop_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1891785, + "supported_languages": null + }, + "macro.dbt.drop_relation_if_exists": { + "name": "drop_relation_if_exists", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/drop.sql", + "original_file_path": "macros/relations/drop.sql", + "unique_id": "macro.dbt.drop_relation_if_exists", + "macro_sql": "{% macro drop_relation_if_exists(relation) %}\n {% if relation is not none %}\n {{ adapter.drop_relation(relation) }}\n {% endif %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1894855, + "supported_languages": null + }, + "macro.dbt.get_replace_sql": { + "name": "get_replace_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/replace.sql", + "original_file_path": "macros/relations/replace.sql", + "unique_id": "macro.dbt.get_replace_sql", + "macro_sql": "{% macro get_replace_sql(existing_relation, target_relation, sql) %}\n {{- log('Applying REPLACE to: ' ~ existing_relation) -}}\n {{- adapter.dispatch('get_replace_sql', 'dbt')(existing_relation, target_relation, sql) -}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_replace_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1905057, + "supported_languages": null + }, + "macro.dbt.default__get_replace_sql": { + "name": "default__get_replace_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/replace.sql", + "original_file_path": "macros/relations/replace.sql", + "unique_id": "macro.dbt.default__get_replace_sql", + "macro_sql": "{% macro default__get_replace_sql(existing_relation, target_relation, sql) %}\n\n {# /* use a create or replace statement if possible */ #}\n\n {% set is_replaceable = existing_relation.type == target_relation_type and existing_relation.can_be_replaced %}\n\n {% if is_replaceable and existing_relation.is_view %}\n {{ get_replace_view_sql(target_relation, sql) }}\n\n {% elif is_replaceable and existing_relation.is_table %}\n {{ get_replace_table_sql(target_relation, sql) }}\n\n {% elif is_replaceable and existing_relation.is_materialized_view %}\n {{ get_replace_materialized_view_sql(target_relation, sql) }}\n\n {# /* a create or replace statement is not possible, so try to stage and/or backup to be safe */ #}\n\n {# /* create target_relation as an intermediate relation, then swap it out with the existing one using a backup */ #}\n {%- elif target_relation.can_be_renamed and existing_relation.can_be_renamed -%}\n {{ get_create_intermediate_sql(target_relation, sql) }};\n {{ get_create_backup_sql(existing_relation) }};\n {{ get_rename_intermediate_sql(target_relation) }};\n {{ get_drop_backup_sql(existing_relation) }}\n\n {# /* create target_relation as an intermediate relation, then swap it out with the existing one without using a backup */ #}\n {%- elif target_relation.can_be_renamed -%}\n {{ get_create_intermediate_sql(target_relation, sql) }};\n {{ get_drop_sql(existing_relation) }};\n {{ get_rename_intermediate_sql(target_relation) }}\n\n {# /* create target_relation in place by first backing up the existing relation */ #}\n {%- elif existing_relation.can_be_renamed -%}\n {{ get_create_backup_sql(existing_relation) }};\n {{ get_create_sql(target_relation, sql) }};\n {{ get_drop_backup_sql(existing_relation) }}\n\n {# /* no renaming is allowed, so just drop and create */ #}\n {%- else -%}\n {{ get_drop_sql(existing_relation) }};\n {{ get_create_sql(target_relation, sql) }}\n\n {%- endif -%}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.get_replace_view_sql", + "macro.dbt.get_replace_table_sql", + "macro.dbt.get_replace_materialized_view_sql", + "macro.dbt.get_create_intermediate_sql", + "macro.dbt.get_create_backup_sql", + "macro.dbt.get_rename_intermediate_sql", + "macro.dbt.get_drop_backup_sql", + "macro.dbt.get_drop_sql", + "macro.dbt.get_create_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1923814, + "supported_languages": null + }, + "macro.dbt.get_rename_intermediate_sql": { + "name": "get_rename_intermediate_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/rename_intermediate.sql", + "original_file_path": "macros/relations/rename_intermediate.sql", + "unique_id": "macro.dbt.get_rename_intermediate_sql", + "macro_sql": "{%- macro get_rename_intermediate_sql(relation) -%}\n {{- log('Applying RENAME INTERMEDIATE to: ' ~ relation) -}}\n {{- adapter.dispatch('get_rename_intermediate_sql', 'dbt')(relation) -}}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.dbt.default__get_rename_intermediate_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.192882, + "supported_languages": null + }, + "macro.dbt.default__get_rename_intermediate_sql": { + "name": "default__get_rename_intermediate_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/rename_intermediate.sql", + "original_file_path": "macros/relations/rename_intermediate.sql", + "unique_id": "macro.dbt.default__get_rename_intermediate_sql", + "macro_sql": "{%- macro default__get_rename_intermediate_sql(relation) -%}\n\n -- get the standard intermediate name\n {% set intermediate_relation = make_intermediate_relation(relation) %}\n\n {{ get_rename_sql(intermediate_relation, relation.identifier) }}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.make_intermediate_relation", + "macro.dbt.get_rename_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1932275, + "supported_languages": null + }, + "macro.dbt.get_create_sql": { + "name": "get_create_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/create.sql", + "original_file_path": "macros/relations/create.sql", + "unique_id": "macro.dbt.get_create_sql", + "macro_sql": "{%- macro get_create_sql(relation, sql) -%}\n {{- log('Applying CREATE to: ' ~ relation) -}}\n {{- adapter.dispatch('get_create_sql', 'dbt')(relation, sql) -}}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.dbt.default__get_create_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1938422, + "supported_languages": null + }, + "macro.dbt.default__get_create_sql": { + "name": "default__get_create_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/create.sql", + "original_file_path": "macros/relations/create.sql", + "unique_id": "macro.dbt.default__get_create_sql", + "macro_sql": "{%- macro default__get_create_sql(relation, sql) -%}\n\n {%- if relation.is_view -%}\n {{ get_create_view_as_sql(relation, sql) }}\n\n {%- elif relation.is_table -%}\n {{ get_create_table_as_sql(False, relation, sql) }}\n\n {%- elif relation.is_materialized_view -%}\n {{ get_create_materialized_view_as_sql(relation, sql) }}\n\n {%- else -%}\n {{- exceptions.raise_compiler_error(\"`get_create_sql` has not been implemented for: \" ~ relation.type ) -}}\n\n {%- endif -%}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.get_create_view_as_sql", + "macro.dbt.get_create_table_as_sql", + "macro.dbt.get_create_materialized_view_as_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1945724, + "supported_languages": null + }, + "macro.dbt.get_create_backup_sql": { + "name": "get_create_backup_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/create_backup.sql", + "original_file_path": "macros/relations/create_backup.sql", + "unique_id": "macro.dbt.get_create_backup_sql", + "macro_sql": "{%- macro get_create_backup_sql(relation) -%}\n {{- log('Applying CREATE BACKUP to: ' ~ relation) -}}\n {{- adapter.dispatch('get_create_backup_sql', 'dbt')(relation) -}}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.dbt.default__get_create_backup_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1950872, + "supported_languages": null + }, + "macro.dbt.default__get_create_backup_sql": { + "name": "default__get_create_backup_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/create_backup.sql", + "original_file_path": "macros/relations/create_backup.sql", + "unique_id": "macro.dbt.default__get_create_backup_sql", + "macro_sql": "{%- macro default__get_create_backup_sql(relation) -%}\n\n -- get the standard backup name\n {% set backup_relation = make_backup_relation(relation, relation.type) %}\n\n -- drop any pre-existing backup\n {{ get_drop_sql(backup_relation) }};\n\n {{ get_rename_sql(relation, backup_relation.identifier) }}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.make_backup_relation", + "macro.dbt.get_drop_sql", + "macro.dbt.get_rename_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.195489, + "supported_languages": null + }, + "macro.dbt.get_drop_backup_sql": { + "name": "get_drop_backup_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/drop_backup.sql", + "original_file_path": "macros/relations/drop_backup.sql", + "unique_id": "macro.dbt.get_drop_backup_sql", + "macro_sql": "{%- macro get_drop_backup_sql(relation) -%}\n {{- log('Applying DROP BACKUP to: ' ~ relation) -}}\n {{- adapter.dispatch('get_drop_backup_sql', 'dbt')(relation) -}}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.dbt.default__get_drop_backup_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1959343, + "supported_languages": null + }, + "macro.dbt.default__get_drop_backup_sql": { + "name": "default__get_drop_backup_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/drop_backup.sql", + "original_file_path": "macros/relations/drop_backup.sql", + "unique_id": "macro.dbt.default__get_drop_backup_sql", + "macro_sql": "{%- macro default__get_drop_backup_sql(relation) -%}\n\n -- get the standard backup name\n {% set backup_relation = make_backup_relation(relation, relation.type) %}\n\n {{ get_drop_sql(backup_relation) }}\n\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt.make_backup_relation", "macro.dbt.get_drop_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1962416, + "supported_languages": null + }, + "macro.dbt.get_rename_materialized_view_sql": { + "name": "get_rename_materialized_view_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/rename.sql", + "original_file_path": "macros/relations/materialized_view/rename.sql", + "unique_id": "macro.dbt.get_rename_materialized_view_sql", + "macro_sql": "{% macro get_rename_materialized_view_sql(relation, new_name) %}\n {{- adapter.dispatch('get_rename_materialized_view_sql', 'dbt')(relation, new_name) -}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_rename_materialized_view_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1966562, + "supported_languages": null + }, + "macro.dbt.default__get_rename_materialized_view_sql": { + "name": "default__get_rename_materialized_view_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/rename.sql", + "original_file_path": "macros/relations/materialized_view/rename.sql", + "unique_id": "macro.dbt.default__get_rename_materialized_view_sql", + "macro_sql": "{% macro default__get_rename_materialized_view_sql(relation, new_name) %}\n {{ exceptions.raise_compiler_error(\n \"`get_rename_materialized_view_sql` has not been implemented for this adapter.\"\n ) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1970127, + "supported_languages": null + }, + "macro.dbt.refresh_materialized_view": { + "name": "refresh_materialized_view", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/refresh.sql", + "original_file_path": "macros/relations/materialized_view/refresh.sql", + "unique_id": "macro.dbt.refresh_materialized_view", + "macro_sql": "{% macro refresh_materialized_view(relation) %}\n {{- log('Applying REFRESH to: ' ~ relation) -}}\n {{- adapter.dispatch('refresh_materialized_view', 'dbt')(relation) -}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__refresh_materialized_view"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.197473, + "supported_languages": null + }, + "macro.dbt.default__refresh_materialized_view": { + "name": "default__refresh_materialized_view", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/refresh.sql", + "original_file_path": "macros/relations/materialized_view/refresh.sql", + "unique_id": "macro.dbt.default__refresh_materialized_view", + "macro_sql": "{% macro default__refresh_materialized_view(relation) %}\n {{ exceptions.raise_compiler_error(\"`refresh_materialized_view` has not been implemented for this adapter.\") }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.197676, + "supported_languages": null + }, + "macro.dbt.drop_materialized_view": { + "name": "drop_materialized_view", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/drop.sql", + "original_file_path": "macros/relations/materialized_view/drop.sql", + "unique_id": "macro.dbt.drop_materialized_view", + "macro_sql": "{% macro drop_materialized_view(relation) -%}\n {{ return(adapter.dispatch('drop_materialized_view', 'dbt')(relation)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__drop_materialized_view"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1980803, + "supported_languages": null + }, + "macro.dbt.default__drop_materialized_view": { + "name": "default__drop_materialized_view", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/drop.sql", + "original_file_path": "macros/relations/materialized_view/drop.sql", + "unique_id": "macro.dbt.default__drop_materialized_view", + "macro_sql": "{% macro default__drop_materialized_view(relation) -%}\n drop materialized view if exists {{ relation }} cascade\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1982467, + "supported_languages": null + }, + "macro.dbt.get_replace_materialized_view_sql": { + "name": "get_replace_materialized_view_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/replace.sql", + "original_file_path": "macros/relations/materialized_view/replace.sql", + "unique_id": "macro.dbt.get_replace_materialized_view_sql", + "macro_sql": "{% macro get_replace_materialized_view_sql(relation, sql) %}\n {{- adapter.dispatch('get_replace_materialized_view_sql', 'dbt')(relation, sql) -}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_replace_materialized_view_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1986783, + "supported_languages": null + }, + "macro.dbt.default__get_replace_materialized_view_sql": { + "name": "default__get_replace_materialized_view_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/replace.sql", + "original_file_path": "macros/relations/materialized_view/replace.sql", + "unique_id": "macro.dbt.default__get_replace_materialized_view_sql", + "macro_sql": "{% macro default__get_replace_materialized_view_sql(relation, sql) %}\n {{ exceptions.raise_compiler_error(\n \"`get_replace_materialized_view_sql` has not been implemented for this adapter.\"\n ) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1989007, + "supported_languages": null + }, + "macro.dbt.get_alter_materialized_view_as_sql": { + "name": "get_alter_materialized_view_as_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/alter.sql", + "original_file_path": "macros/relations/materialized_view/alter.sql", + "unique_id": "macro.dbt.get_alter_materialized_view_as_sql", + "macro_sql": "{% macro get_alter_materialized_view_as_sql(\n relation,\n configuration_changes,\n sql,\n existing_relation,\n backup_relation,\n intermediate_relation\n) %}\n {{- log('Applying ALTER to: ' ~ relation) -}}\n {{- adapter.dispatch('get_alter_materialized_view_as_sql', 'dbt')(\n relation,\n configuration_changes,\n sql,\n existing_relation,\n backup_relation,\n intermediate_relation\n ) -}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_alter_materialized_view_as_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.1997306, + "supported_languages": null + }, + "macro.dbt.default__get_alter_materialized_view_as_sql": { + "name": "default__get_alter_materialized_view_as_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/alter.sql", + "original_file_path": "macros/relations/materialized_view/alter.sql", + "unique_id": "macro.dbt.default__get_alter_materialized_view_as_sql", + "macro_sql": "{% macro default__get_alter_materialized_view_as_sql(\n relation,\n configuration_changes,\n sql,\n existing_relation,\n backup_relation,\n intermediate_relation\n) %}\n {{ exceptions.raise_compiler_error(\"Materialized views have not been implemented for this adapter.\") }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.200049, + "supported_languages": null + }, + "macro.dbt.get_materialized_view_configuration_changes": { + "name": "get_materialized_view_configuration_changes", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/alter.sql", + "original_file_path": "macros/relations/materialized_view/alter.sql", + "unique_id": "macro.dbt.get_materialized_view_configuration_changes", + "macro_sql": "{% macro get_materialized_view_configuration_changes(existing_relation, new_config) %}\n /* {#\n It's recommended that configuration changes be formatted as follows:\n {\"\": [{\"action\": \"\", \"context\": ...}]}\n\n For example:\n {\n \"indexes\": [\n {\"action\": \"drop\", \"context\": \"index_abc\"},\n {\"action\": \"create\", \"context\": {\"columns\": [\"column_1\", \"column_2\"], \"type\": \"hash\", \"unique\": True}},\n ],\n }\n\n Either way, `get_materialized_view_configuration_changes` needs to align with `get_alter_materialized_view_as_sql`.\n #} */\n {{- log('Determining configuration changes on: ' ~ existing_relation) -}}\n {%- do return(adapter.dispatch('get_materialized_view_configuration_changes', 'dbt')(existing_relation, new_config)) -%}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.default__get_materialized_view_configuration_changes" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2005033, + "supported_languages": null + }, + "macro.dbt.default__get_materialized_view_configuration_changes": { + "name": "default__get_materialized_view_configuration_changes", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/alter.sql", + "original_file_path": "macros/relations/materialized_view/alter.sql", + "unique_id": "macro.dbt.default__get_materialized_view_configuration_changes", + "macro_sql": "{% macro default__get_materialized_view_configuration_changes(existing_relation, new_config) %}\n {{ exceptions.raise_compiler_error(\"Materialized views have not been implemented for this adapter.\") }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2007234, + "supported_languages": null + }, + "macro.dbt.get_create_materialized_view_as_sql": { + "name": "get_create_materialized_view_as_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/create.sql", + "original_file_path": "macros/relations/materialized_view/create.sql", + "unique_id": "macro.dbt.get_create_materialized_view_as_sql", + "macro_sql": "{% macro get_create_materialized_view_as_sql(relation, sql) -%}\n {{- adapter.dispatch('get_create_materialized_view_as_sql', 'dbt')(relation, sql) -}}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_create_materialized_view_as_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2011094, + "supported_languages": null + }, + "macro.dbt.default__get_create_materialized_view_as_sql": { + "name": "default__get_create_materialized_view_as_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/materialized_view/create.sql", + "original_file_path": "macros/relations/materialized_view/create.sql", + "unique_id": "macro.dbt.default__get_create_materialized_view_as_sql", + "macro_sql": "{% macro default__get_create_materialized_view_as_sql(relation, sql) -%}\n {{ exceptions.raise_compiler_error(\n \"`get_create_materialized_view_as_sql` has not been implemented for this adapter.\"\n ) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.201323, + "supported_languages": null + }, + "macro.dbt.get_table_columns_and_constraints": { + "name": "get_table_columns_and_constraints", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/column/columns_spec_ddl.sql", + "original_file_path": "macros/relations/column/columns_spec_ddl.sql", + "unique_id": "macro.dbt.get_table_columns_and_constraints", + "macro_sql": "{%- macro get_table_columns_and_constraints() -%}\n {{ adapter.dispatch('get_table_columns_and_constraints', 'dbt')() }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__get_table_columns_and_constraints"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2025435, + "supported_languages": null + }, + "macro.dbt.default__get_table_columns_and_constraints": { + "name": "default__get_table_columns_and_constraints", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/column/columns_spec_ddl.sql", + "original_file_path": "macros/relations/column/columns_spec_ddl.sql", + "unique_id": "macro.dbt.default__get_table_columns_and_constraints", + "macro_sql": "{% macro default__get_table_columns_and_constraints() -%}\n {{ return(table_columns_and_constraints()) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.table_columns_and_constraints"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2027445, + "supported_languages": null + }, + "macro.dbt.table_columns_and_constraints": { + "name": "table_columns_and_constraints", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/column/columns_spec_ddl.sql", + "original_file_path": "macros/relations/column/columns_spec_ddl.sql", + "unique_id": "macro.dbt.table_columns_and_constraints", + "macro_sql": "{% macro table_columns_and_constraints() %}\n {# loop through user_provided_columns to create DDL with data types and constraints #}\n {%- set raw_column_constraints = adapter.render_raw_columns_constraints(raw_columns=model['columns']) -%}\n {%- set raw_model_constraints = adapter.render_raw_model_constraints(raw_constraints=model['constraints']) -%}\n (\n {% for c in raw_column_constraints -%}\n {{ c }}{{ \",\" if not loop.last or raw_model_constraints }}\n {% endfor %}\n {% for c in raw_model_constraints -%}\n {{ c }}{{ \",\" if not loop.last }}\n {% endfor -%}\n )\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2037225, + "supported_languages": null + }, + "macro.dbt.get_assert_columns_equivalent": { + "name": "get_assert_columns_equivalent", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/column/columns_spec_ddl.sql", + "original_file_path": "macros/relations/column/columns_spec_ddl.sql", + "unique_id": "macro.dbt.get_assert_columns_equivalent", + "macro_sql": "\n\n{%- macro get_assert_columns_equivalent(sql) -%}\n {{ adapter.dispatch('get_assert_columns_equivalent', 'dbt')(sql) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.default__get_assert_columns_equivalent"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.204004, + "supported_languages": null + }, + "macro.dbt.default__get_assert_columns_equivalent": { + "name": "default__get_assert_columns_equivalent", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/column/columns_spec_ddl.sql", + "original_file_path": "macros/relations/column/columns_spec_ddl.sql", + "unique_id": "macro.dbt.default__get_assert_columns_equivalent", + "macro_sql": "{% macro default__get_assert_columns_equivalent(sql) -%}\n {{ return(assert_columns_equivalent(sql)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.assert_columns_equivalent"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2042098, + "supported_languages": null + }, + "macro.dbt.assert_columns_equivalent": { + "name": "assert_columns_equivalent", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/column/columns_spec_ddl.sql", + "original_file_path": "macros/relations/column/columns_spec_ddl.sql", + "unique_id": "macro.dbt.assert_columns_equivalent", + "macro_sql": "{% macro assert_columns_equivalent(sql) %}\n\n {#-- First ensure the user has defined 'columns' in yaml specification --#}\n {%- set user_defined_columns = model['columns'] -%}\n {%- if not user_defined_columns -%}\n {{ exceptions.raise_contract_error([], []) }}\n {%- endif -%}\n\n {#-- Obtain the column schema provided by sql file. #}\n {%- set sql_file_provided_columns = get_column_schema_from_query(sql, config.get('sql_header', none)) -%}\n {#--Obtain the column schema provided by the schema file by generating an 'empty schema' query from the model's columns. #}\n {%- set schema_file_provided_columns = get_column_schema_from_query(get_empty_schema_sql(user_defined_columns)) -%}\n\n {#-- create dictionaries with name and formatted data type and strings for exception #}\n {%- set sql_columns = format_columns(sql_file_provided_columns) -%}\n {%- set yaml_columns = format_columns(schema_file_provided_columns) -%}\n\n {%- if sql_columns|length != yaml_columns|length -%}\n {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%}\n {%- endif -%}\n\n {%- for sql_col in sql_columns -%}\n {%- set yaml_col = [] -%}\n {%- for this_col in yaml_columns -%}\n {%- if this_col['name'] == sql_col['name'] -%}\n {%- do yaml_col.append(this_col) -%}\n {%- break -%}\n {%- endif -%}\n {%- endfor -%}\n {%- if not yaml_col -%}\n {#-- Column with name not found in yaml #}\n {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%}\n {%- endif -%}\n {%- if sql_col['formatted'] != yaml_col[0]['formatted'] -%}\n {#-- Column data types don't match #}\n {%- do exceptions.raise_contract_error(yaml_columns, sql_columns) -%}\n {%- endif -%}\n {%- endfor -%}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.get_column_schema_from_query", + "macro.dbt.get_empty_schema_sql", + "macro.dbt.format_columns" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.206296, + "supported_languages": null + }, + "macro.dbt.format_columns": { + "name": "format_columns", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/column/columns_spec_ddl.sql", + "original_file_path": "macros/relations/column/columns_spec_ddl.sql", + "unique_id": "macro.dbt.format_columns", + "macro_sql": "{% macro format_columns(columns) %}\n {% set formatted_columns = [] %}\n {% for column in columns %}\n {%- set formatted_column = adapter.dispatch('format_column', 'dbt')(column) -%}\n {%- do formatted_columns.append(formatted_column) -%}\n {% endfor %}\n {{ return(formatted_columns) }}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.codegen.format_column", + "macro.dbt.default__format_column" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.206867, + "supported_languages": null + }, + "macro.dbt.default__format_column": { + "name": "default__format_column", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/column/columns_spec_ddl.sql", + "original_file_path": "macros/relations/column/columns_spec_ddl.sql", + "unique_id": "macro.dbt.default__format_column", + "macro_sql": "{% macro default__format_column(column) -%}\n {% set data_type = column.dtype %}\n {% set formatted = column.column.lower() ~ \" \" ~ data_type %}\n {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }}\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2074356, + "supported_languages": null + }, + "macro.dbt.get_rename_view_sql": { + "name": "get_rename_view_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/rename.sql", + "original_file_path": "macros/relations/view/rename.sql", + "unique_id": "macro.dbt.get_rename_view_sql", + "macro_sql": "{% macro get_rename_view_sql(relation, new_name) %}\n {{- adapter.dispatch('get_rename_view_sql', 'dbt')(relation, new_name) -}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_rename_view_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2078557, + "supported_languages": null + }, + "macro.dbt.default__get_rename_view_sql": { + "name": "default__get_rename_view_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/rename.sql", + "original_file_path": "macros/relations/view/rename.sql", + "unique_id": "macro.dbt.default__get_rename_view_sql", + "macro_sql": "{% macro default__get_rename_view_sql(relation, new_name) %}\n {{ exceptions.raise_compiler_error(\n \"`get_rename_view_sql` has not been implemented for this adapter.\"\n ) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2080746, + "supported_languages": null + }, + "macro.dbt.drop_view": { + "name": "drop_view", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/drop.sql", + "original_file_path": "macros/relations/view/drop.sql", + "unique_id": "macro.dbt.drop_view", + "macro_sql": "{% macro drop_view(relation) -%}\n {{ return(adapter.dispatch('drop_view', 'dbt')(relation)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__drop_view"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2084463, + "supported_languages": null + }, + "macro.dbt.default__drop_view": { + "name": "default__drop_view", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/drop.sql", + "original_file_path": "macros/relations/view/drop.sql", + "unique_id": "macro.dbt.default__drop_view", + "macro_sql": "{% macro default__drop_view(relation) -%}\n drop view if exists {{ relation }} cascade\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2086046, + "supported_languages": null + }, + "macro.dbt.get_replace_view_sql": { + "name": "get_replace_view_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/replace.sql", + "original_file_path": "macros/relations/view/replace.sql", + "unique_id": "macro.dbt.get_replace_view_sql", + "macro_sql": "{% macro get_replace_view_sql(relation, sql) %}\n {{- adapter.dispatch('get_replace_view_sql', 'dbt')(relation, sql) -}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_replace_view_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2096982, + "supported_languages": null + }, + "macro.dbt.default__get_replace_view_sql": { + "name": "default__get_replace_view_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/replace.sql", + "original_file_path": "macros/relations/view/replace.sql", + "unique_id": "macro.dbt.default__get_replace_view_sql", + "macro_sql": "{% macro default__get_replace_view_sql(relation, sql) %}\n {{ exceptions.raise_compiler_error(\n \"`get_replace_view_sql` has not been implemented for this adapter.\"\n ) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2099397, + "supported_languages": null + }, + "macro.dbt.create_or_replace_view": { + "name": "create_or_replace_view", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/replace.sql", + "original_file_path": "macros/relations/view/replace.sql", + "unique_id": "macro.dbt.create_or_replace_view", + "macro_sql": "{% macro create_or_replace_view() %}\n {%- set identifier = model['alias'] -%}\n\n {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}\n {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}\n\n {%- set target_relation = api.Relation.create(\n identifier=identifier, schema=schema, database=database,\n type='view') -%}\n {% set grant_config = config.get('grants') %}\n\n {{ run_hooks(pre_hooks) }}\n\n -- If there's a table with the same name and we weren't told to full refresh,\n -- that's an error. If we were told to full refresh, drop it. This behavior differs\n -- for Snowflake and BigQuery, so multiple dispatch is used.\n {%- if old_relation is not none and old_relation.is_table -%}\n {{ handle_existing_table(should_full_refresh(), old_relation) }}\n {%- endif -%}\n\n -- build model\n {% call statement('main') -%}\n {{ get_create_view_as_sql(target_relation, sql) }}\n {%- endcall %}\n\n {% set should_revoke = should_revoke(exists_as_view, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {{ run_hooks(post_hooks) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.run_hooks", + "macro.dbt.handle_existing_table", + "macro.dbt.should_full_refresh", + "macro.dbt.statement", + "macro.dbt.get_create_view_as_sql", + "macro.dbt.should_revoke", + "macro.dbt.apply_grants" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2116394, + "supported_languages": null + }, + "macro.dbt.handle_existing_table": { + "name": "handle_existing_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/replace.sql", + "original_file_path": "macros/relations/view/replace.sql", + "unique_id": "macro.dbt.handle_existing_table", + "macro_sql": "{% macro handle_existing_table(full_refresh, old_relation) %}\n {{ adapter.dispatch('handle_existing_table', 'dbt')(full_refresh, old_relation) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__handle_existing_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2119286, + "supported_languages": null + }, + "macro.dbt.default__handle_existing_table": { + "name": "default__handle_existing_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/replace.sql", + "original_file_path": "macros/relations/view/replace.sql", + "unique_id": "macro.dbt.default__handle_existing_table", + "macro_sql": "{% macro default__handle_existing_table(full_refresh, old_relation) %}\n {{ log(\"Dropping relation \" ~ old_relation ~ \" because it is of type \" ~ old_relation.type) }}\n {{ adapter.drop_relation(old_relation) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.212284, + "supported_languages": null + }, + "macro.dbt.get_create_view_as_sql": { + "name": "get_create_view_as_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/create.sql", + "original_file_path": "macros/relations/view/create.sql", + "unique_id": "macro.dbt.get_create_view_as_sql", + "macro_sql": "{% macro get_create_view_as_sql(relation, sql) -%}\n {{ adapter.dispatch('get_create_view_as_sql', 'dbt')(relation, sql) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_create_view_as_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2128475, + "supported_languages": null + }, + "macro.dbt.default__get_create_view_as_sql": { + "name": "default__get_create_view_as_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/create.sql", + "original_file_path": "macros/relations/view/create.sql", + "unique_id": "macro.dbt.default__get_create_view_as_sql", + "macro_sql": "{% macro default__get_create_view_as_sql(relation, sql) -%}\n {{ return(create_view_as(relation, sql)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.create_view_as"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2130923, + "supported_languages": null + }, + "macro.dbt.create_view_as": { + "name": "create_view_as", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/create.sql", + "original_file_path": "macros/relations/view/create.sql", + "unique_id": "macro.dbt.create_view_as", + "macro_sql": "{% macro create_view_as(relation, sql) -%}\n {{ adapter.dispatch('create_view_as', 'dbt')(relation, sql) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__create_view_as"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2133589, + "supported_languages": null + }, + "macro.dbt.default__create_view_as": { + "name": "default__create_view_as", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/view/create.sql", + "original_file_path": "macros/relations/view/create.sql", + "unique_id": "macro.dbt.default__create_view_as", + "macro_sql": "{% macro default__create_view_as(relation, sql) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n create view {{ relation }}\n {% set contract_config = config.get('contract') %}\n {% if contract_config.enforced %}\n {{ get_assert_columns_equivalent(sql) }}\n {%- endif %}\n as (\n {{ sql }}\n );\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.get_assert_columns_equivalent"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2140079, + "supported_languages": null + }, + "macro.dbt.get_rename_table_sql": { + "name": "get_rename_table_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/rename.sql", + "original_file_path": "macros/relations/table/rename.sql", + "unique_id": "macro.dbt.get_rename_table_sql", + "macro_sql": "{% macro get_rename_table_sql(relation, new_name) %}\n {{- adapter.dispatch('get_rename_table_sql', 'dbt')(relation, new_name) -}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_rename_table_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2144306, + "supported_languages": null + }, + "macro.dbt.default__get_rename_table_sql": { + "name": "default__get_rename_table_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/rename.sql", + "original_file_path": "macros/relations/table/rename.sql", + "unique_id": "macro.dbt.default__get_rename_table_sql", + "macro_sql": "{% macro default__get_rename_table_sql(relation, new_name) %}\n {{ exceptions.raise_compiler_error(\n \"`get_rename_table_sql` has not been implemented for this adapter.\"\n ) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2146592, + "supported_languages": null + }, + "macro.dbt.drop_table": { + "name": "drop_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/drop.sql", + "original_file_path": "macros/relations/table/drop.sql", + "unique_id": "macro.dbt.drop_table", + "macro_sql": "{% macro drop_table(relation) -%}\n {{ return(adapter.dispatch('drop_table', 'dbt')(relation)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__drop_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2150266, + "supported_languages": null + }, + "macro.dbt.default__drop_table": { + "name": "default__drop_table", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/drop.sql", + "original_file_path": "macros/relations/table/drop.sql", + "unique_id": "macro.dbt.default__drop_table", + "macro_sql": "{% macro default__drop_table(relation) -%}\n drop table if exists {{ relation }} cascade\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.21519, + "supported_languages": null + }, + "macro.dbt.get_replace_table_sql": { + "name": "get_replace_table_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/replace.sql", + "original_file_path": "macros/relations/table/replace.sql", + "unique_id": "macro.dbt.get_replace_table_sql", + "macro_sql": "{% macro get_replace_table_sql(relation, sql) %}\n {{- adapter.dispatch('get_replace_table_sql', 'dbt')(relation, sql) -}}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_replace_table_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2155597, + "supported_languages": null + }, + "macro.dbt.default__get_replace_table_sql": { + "name": "default__get_replace_table_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/replace.sql", + "original_file_path": "macros/relations/table/replace.sql", + "unique_id": "macro.dbt.default__get_replace_table_sql", + "macro_sql": "{% macro default__get_replace_table_sql(relation, sql) %}\n {{ exceptions.raise_compiler_error(\n \"`get_replace_table_sql` has not been implemented for this adapter.\"\n ) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2159061, + "supported_languages": null + }, + "macro.dbt.get_create_table_as_sql": { + "name": "get_create_table_as_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/create.sql", + "original_file_path": "macros/relations/table/create.sql", + "unique_id": "macro.dbt.get_create_table_as_sql", + "macro_sql": "{% macro get_create_table_as_sql(temporary, relation, sql) -%}\n {{ adapter.dispatch('get_create_table_as_sql', 'dbt')(temporary, relation, sql) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_create_table_as_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2169352, + "supported_languages": null + }, + "macro.dbt.default__get_create_table_as_sql": { + "name": "default__get_create_table_as_sql", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/create.sql", + "original_file_path": "macros/relations/table/create.sql", + "unique_id": "macro.dbt.default__get_create_table_as_sql", + "macro_sql": "{% macro default__get_create_table_as_sql(temporary, relation, sql) -%}\n {{ return(create_table_as(temporary, relation, sql)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.create_table_as"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.217213, + "supported_languages": null + }, + "macro.dbt.create_table_as": { + "name": "create_table_as", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/create.sql", + "original_file_path": "macros/relations/table/create.sql", + "unique_id": "macro.dbt.create_table_as", + "macro_sql": "{% macro create_table_as(temporary, relation, compiled_code, language='sql') -%}\n {# backward compatibility for create_table_as that does not support language #}\n {% if language == \"sql\" %}\n {{ adapter.dispatch('create_table_as', 'dbt')(temporary, relation, compiled_code)}}\n {% else %}\n {{ adapter.dispatch('create_table_as', 'dbt')(temporary, relation, compiled_code, language) }}\n {% endif %}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_duckdb.duckdb__create_table_as"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2179232, + "supported_languages": null + }, + "macro.dbt.default__create_table_as": { + "name": "default__create_table_as", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/create.sql", + "original_file_path": "macros/relations/table/create.sql", + "unique_id": "macro.dbt.default__create_table_as", + "macro_sql": "{% macro default__create_table_as(temporary, relation, sql) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n\n create {% if temporary: -%}temporary{%- endif %} table\n {{ relation.include(database=(not temporary), schema=(not temporary)) }}\n {% set contract_config = config.get('contract') %}\n {% if contract_config.enforced and (not temporary) %}\n {{ get_assert_columns_equivalent(sql) }}\n {{ get_table_columns_and_constraints() }}\n {%- set sql = get_select_subquery(sql) %}\n {% endif %}\n as (\n {{ sql }}\n );\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.get_assert_columns_equivalent", + "macro.dbt.get_table_columns_and_constraints", + "macro.dbt.get_select_subquery" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2189817, + "supported_languages": null + }, + "macro.dbt.default__get_column_names": { + "name": "default__get_column_names", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/create.sql", + "original_file_path": "macros/relations/table/create.sql", + "unique_id": "macro.dbt.default__get_column_names", + "macro_sql": "{% macro default__get_column_names() %}\n {#- loop through user_provided_columns to get column names -#}\n {%- set user_provided_columns = model['columns'] -%}\n {%- for i in user_provided_columns %}\n {%- set col = user_provided_columns[i] -%}\n {%- set col_name = adapter.quote(col['name']) if col.get('quote') else col['name'] -%}\n {{ col_name }}{{ \", \" if not loop.last }}\n {%- endfor -%}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2197435, + "supported_languages": null + }, + "macro.dbt.get_select_subquery": { + "name": "get_select_subquery", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/create.sql", + "original_file_path": "macros/relations/table/create.sql", + "unique_id": "macro.dbt.get_select_subquery", + "macro_sql": "{% macro get_select_subquery(sql) %}\n {{ return(adapter.dispatch('get_select_subquery', 'dbt')(sql)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.default__get_select_subquery"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2200282, + "supported_languages": null + }, + "macro.dbt.default__get_select_subquery": { + "name": "default__get_select_subquery", + "resource_type": "macro", + "package_name": "dbt", + "path": "macros/relations/table/create.sql", + "original_file_path": "macros/relations/table/create.sql", + "unique_id": "macro.dbt.default__get_select_subquery", + "macro_sql": "{% macro default__get_select_subquery(sql) %}\n select {{ adapter.dispatch('get_column_names', 'dbt')() }}\n from (\n {{ sql }}\n ) as model_subq\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_duckdb.get_column_names", + "macro.dbt.default__get_column_names" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.220293, + "supported_languages": null + }, + "macro.dbt.test_unique": { + "name": "test_unique", + "resource_type": "macro", + "package_name": "dbt", + "path": "tests/generic/builtin.sql", + "original_file_path": "tests/generic/builtin.sql", + "unique_id": "macro.dbt.test_unique", + "macro_sql": "{% test unique(model, column_name) %}\n {% set macro = adapter.dispatch('test_unique', 'dbt') %}\n {{ macro(model, column_name) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt.default__test_unique"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2209854, + "supported_languages": null + }, + "macro.dbt.test_not_null": { + "name": "test_not_null", + "resource_type": "macro", + "package_name": "dbt", + "path": "tests/generic/builtin.sql", + "original_file_path": "tests/generic/builtin.sql", + "unique_id": "macro.dbt.test_not_null", + "macro_sql": "{% test not_null(model, column_name) %}\n {% set macro = adapter.dispatch('test_not_null', 'dbt') %}\n {{ macro(model, column_name) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt.default__test_not_null"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2213333, + "supported_languages": null + }, + "macro.dbt.test_accepted_values": { + "name": "test_accepted_values", + "resource_type": "macro", + "package_name": "dbt", + "path": "tests/generic/builtin.sql", + "original_file_path": "tests/generic/builtin.sql", + "unique_id": "macro.dbt.test_accepted_values", + "macro_sql": "{% test accepted_values(model, column_name, values, quote=True) %}\n {% set macro = adapter.dispatch('test_accepted_values', 'dbt') %}\n {{ macro(model, column_name, values, quote) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt.default__test_accepted_values"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2217479, + "supported_languages": null + }, + "macro.dbt.test_relationships": { + "name": "test_relationships", + "resource_type": "macro", + "package_name": "dbt", + "path": "tests/generic/builtin.sql", + "original_file_path": "tests/generic/builtin.sql", + "unique_id": "macro.dbt.test_relationships", + "macro_sql": "{% test relationships(model, column_name, to, field) %}\n {% set macro = adapter.dispatch('test_relationships', 'dbt') %}\n {{ macro(model, column_name, to, field) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt.default__test_relationships"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2221446, + "supported_languages": null + }, + "macro.codegen.generate_model_import_ctes": { + "name": "generate_model_import_ctes", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/generate_model_import_ctes.sql", + "original_file_path": "macros/generate_model_import_ctes.sql", + "unique_id": "macro.codegen.generate_model_import_ctes", + "macro_sql": "{% macro generate_model_import_ctes(model_name, leading_commas = false) %}\n\n {%- if execute -%}\n {%- set nodes = graph.nodes.values() -%}\n\n {%- set model = (nodes\n | selectattr('name', 'equalto', model_name) \n | selectattr('resource_type', 'equalto', 'model')\n | list).pop() -%}\n\n {%- set model_raw_sql = model.raw_sql or model.raw_code -%}\n {%- else -%}\n {%- set model_raw_sql = '' -%}\n {%- endif -%}\n\n {#-\n\n REGEX Explanations\n\n # with_regex\n - matches (start of file followed by anything then whitespace\n or whitespace\n or a comma) followed by the word with then a space \n\n # from_ref \n - matches (from or join) followed by some spaces and then {{ref()}}\n\n # from_source \n - matches (from or join) followed by some spaces and then {{source(,)}}\n\n # from_var_1\n - matches (from or join) followed by some spaces and then {{var()}}\n\n # from_var_2\n - matches (from or join) followed by some spaces and then {{var(,)}}\n\n # from_table_1\n - matches (from or join) followed by some spaces and then .\n where each is enclosed by (` or [ or \" or ' or nothing)\n\n # from_table_2\n - matches (from or join) followed by some spaces and then ..\n where each is enclosed by (` or [ or \" or ' or nothing)\n\n # from_table_3\n - matches (from or join) followed by some spaces and then \n where is enclosed by (` or [ or \" or ')\n\n # config block\n - matches the start of the file followed by anything and then {{config()}}\n\n -#}\n\n {%- set re = modules.re -%}\n\n {%- set with_regex = '(?i)(?s)(^.*\\s*|\\s+|,)with\\s' -%}\n {%- set does_raw_sql_contain_cte = re.search(with_regex, model_raw_sql) -%}\n\n {%- set from_regexes = {\n 'from_ref':\n '(?ix)\n\n # first matching group\n # from or join followed by at least 1 whitespace character\n (from|join)\\s+\n\n # second matching group\n # opening {{, 0 or more whitespace character(s), ref, 0 or more whitespace character(s), an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark\n ({{\\s*ref\\s*\\(\\s*[\\'\\\"]?)\n \n # third matching group\n # at least 1 of anything except a parenthesis or quotation mark\n ([^)\\'\\\"]+)\n \n # fourth matching group\n # 1 or 0 quotation mark, 0 or more whitespace character(s)\n ([\\'\\\"]?\\s*)\n\n # fifth matching group\n # a closing parenthesis, 0 or more whitespace character(s), closing }}\n (\\)\\s*}})\n \n ',\n 'from_source':\n '(?ix)\n\n # first matching group\n # from or join followed by at least 1 whitespace character\n (from|join)\\s+\n\n # second matching group\n # opening {{, 0 or more whitespace character(s), source, 0 or more whitespace character(s), an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark\n ({{\\s*source\\s*\\(\\s*[\\'\\\"]?)\n\n # third matching group\n # at least 1 of anything except a parenthesis or quotation mark\n ([^)\\'\\\"]+)\n\n # fourth matching group\n # 1 or 0 quotation mark, 0 or more whitespace character(s)\n ([\\'\\\"]?\\s*)\n\n # fifth matching group\n # a comma\n (,)\n\n # sixth matching group\n # 0 or more whitespace character(s), 1 or 0 quotation mark\n (\\s*[\\'\\\"]?)\n\n # seventh matching group\n # at least 1 of anything except a parenthesis or quotation mark\n ([^)\\'\\\"]+)\n\n # eighth matching group\n # 1 or 0 quotation mark, 0 or more whitespace character(s)\n ([\\'\\\"]?\\s*)\n\n # ninth matching group\n # a closing parenthesis, 0 or more whitespace character(s), closing }}\n (\\)\\s*}})\n\n ',\n 'from_var_1':\n '(?ix)\n\n # first matching group\n # from or join followed by at least 1 whitespace character\n (from|join)\\s+\n\n # second matching group\n # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s), an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark\n ({{\\s*var\\s*\\(\\s*[\\'\\\"]?)\n\n # third matching group\n # at least 1 of anything except a parenthesis or quotation mark\n ([^)\\'\\\"]+)\n\n # fourth matching group\n # 1 or 0 quotation mark, 0 or more whitespace character(s)\n ([\\'\\\"]?\\s*)\n\n # fifth matching group\n # a closing parenthesis, 0 or more whitespace character(s), closing }}\n (\\)\\s*}})\n \n ',\n 'from_var_2':\n '(?ix)\n\n # first matching group\n # from or join followed by at least 1 whitespace character\n (from|join)\\s+\n \n # second matching group\n # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s), an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark\n ({{\\s*var\\s*\\(\\s*[\\'\\\"]?)\n\n # third matching group\n # at least 1 of anything except a parenthesis or quotation mark \n ([^)\\'\\\"]+)\n \n # fourth matching group\n # 1 or 0 quotation mark, 0 or more whitespace character(s)\n ([\\'\\\"]?\\s*)\n\n # fifth matching group\n # a comma\n (,)\n\n # sixth matching group\n # 0 or more whitespace character(s), 1 or 0 quotation mark \n (\\s*[\\'\\\"]?)\n\n # seventh matching group\n # at least 1 of anything except a parenthesis or quotation mark \n ([^)\\'\\\"]+)\n\n # eighth matching group\n # 1 or 0 quotation mark, 0 or more whitespace character(s) \n ([\\'\\\"]?\\s*)\n\n # ninth matching group\n # a closing parenthesis, 0 or more whitespace character(s), closing }} \n (\\)\\s*}})\n \n ',\n 'from_table_1':\n '(?ix)\n \n # first matching group\n # from or join followed by at least 1 whitespace character \n (from|join)\\s+\n \n # second matching group\n # 1 or 0 of (opening bracket, backtick, or quotation mark)\n ([\\[`\\\"\\']?)\n \n # third matching group\n # at least 1 word character\n (\\w+)\n \n # fouth matching group\n # 1 or 0 of (closing bracket, backtick, or quotation mark)\n ([\\]`\\\"\\']?)\n \n # fifth matching group\n # a period\n (\\.)\n \n # sixth matching group\n # 1 or 0 of (opening bracket, backtick, or quotation mark)\n ([\\[`\\\"\\']?)\n \n # seventh matching group\n # at least 1 word character\n (\\w+)\n \n # eighth matching group\n # 1 or 0 of (closing bracket, backtick, or quotation mark) folowed by a whitespace character or end of string\n ([\\]`\\\"\\']?)(?=\\s|$)\n \n ',\n 'from_table_2':\n '(?ix)\n\n # first matching group\n # from or join followed by at least 1 whitespace character \n (from|join)\\s+\n \n # second matching group\n # 1 or 0 of (opening bracket, backtick, or quotation mark) \n ([\\[`\\\"\\']?)\n \n # third matching group\n # at least 1 word character\n (\\w+)\n\n # fouth matching group\n # 1 or 0 of (closing bracket, backtick, or quotation mark) \n ([\\]`\\\"\\']?)\n \n # fifth matching group\n # a period \n (\\.)\n \n # sixth matching group\n # 1 or 0 of (opening bracket, backtick, or quotation mark)\n ([\\[`\\\"\\']?)\n\n # seventh matching group\n # at least 1 word character \n (\\w+)\n \n # eighth matching group\n # 1 or 0 of (closing bracket, backtick, or quotation mark) \n ([\\]`\\\"\\']?)\n \n # ninth matching group\n # a period \n (\\.)\n \n # tenth matching group\n # 1 or 0 of (closing bracket, backtick, or quotation mark) \n ([\\[`\\\"\\']?)\n \n # eleventh matching group\n # at least 1 word character \n (\\w+)\n\n # twelfth matching group\n # 1 or 0 of (closing bracket, backtick, or quotation mark) folowed by a whitespace character or end of string\n ([\\]`\\\"\\']?)(?=\\s|$)\n \n ',\n 'from_table_3':\n '(?ix)\n\n # first matching group\n # from or join followed by at least 1 whitespace character \n (from|join)\\s+\n \n # second matching group\n # 1 or 0 of (opening bracket, backtick, or quotation mark) \n ([\\[`\\\"\\'])\n \n # third matching group\n # at least 1 word character or space \n ([\\w ]+)\n\n # fourth matching group\n # 1 or 0 of (closing bracket, backtick, or quotation mark) folowed by a whitespace character or end of string\n ([\\]`\\\"\\'])(?=\\s|$)\n \n ',\n 'config_block':'(?i)(?s)^.*{{\\s*config\\s*\\([^)]+\\)\\s*}}'\n } -%}\n\n {%- set from_list = [] -%}\n {%- set config_list = [] -%}\n {%- set ns = namespace(model_sql = model_raw_sql) -%}\n\n {%- for regex_name, regex_pattern in from_regexes.items() -%}\n\n {%- set all_regex_matches = re.findall(regex_pattern, model_raw_sql) -%}\n\n {%- for match in all_regex_matches -%}\n\n {%- if regex_name == 'config_block' -%}\n {%- set match_tuple = (match|trim, regex_name) -%}\n {%- do config_list.append(match_tuple) -%}\n {%- elif regex_name == 'from_source' -%} \n {%- set full_from_clause = match[1:]|join|trim -%}\n {%- set cte_name = 'source_' + match[6]|lower -%}\n {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%}\n {%- do from_list.append(match_tuple) -%} \n {%- elif regex_name == 'from_table_1' -%}\n {%- set full_from_clause = match[1:]|join()|trim -%}\n {%- set cte_name = match[2]|lower + '_' + match[6]|lower -%}\n {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%}\n {%- do from_list.append(match_tuple) -%} \n {%- elif regex_name == 'from_table_2' -%}\n {%- set full_from_clause = match[1:]|join()|trim -%}\n {%- set cte_name = match[2]|lower + '_' + match[6]|lower + '_' + match[10]|lower -%}\n {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%}\n {%- do from_list.append(match_tuple) -%} \n {%- else -%}\n {%- set full_from_clause = match[1:]|join|trim -%}\n {%- set cte_name = match[2]|trim|lower -%}\n {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%}\n {%- do from_list.append(match_tuple) -%}\n {%- endif -%}\n\n {%- endfor -%}\n\n {%- if regex_name == 'config_block' -%}\n {%- elif regex_name == 'from_source' -%}\n {%- set ns.model_sql = re.sub(regex_pattern, '\\g<1> source_\\g<7>', ns.model_sql) -%} \n {%- elif regex_name == 'from_table_1' -%}\n {%- set ns.model_sql = re.sub(regex_pattern, '\\g<1> \\g<3>_\\g<7>', ns.model_sql) -%} \n {%- elif regex_name == 'from_table_2' -%}\n {%- set ns.model_sql = re.sub(regex_pattern, '\\g<1> \\g<3>_\\g<7>_\\g<11>', ns.model_sql) -%} \n {%- else -%} \n {%- set ns.model_sql = re.sub(regex_pattern, '\\g<1> \\g<3>', ns.model_sql) -%} \n {% endif %}\n\n {%- endfor -%}\n\n{%- if from_list|length > 0 -%}\n\n{%- set model_import_ctes -%}\n\n {%- for config_obj in config_list -%}\n\n {%- set ns.model_sql = ns.model_sql|replace(config_obj[0], '') -%}\n\n{{ config_obj[0] }}\n\n{% endfor -%}\n\n {%- for from_obj in from_list|unique|sort -%}\n\n{%- if loop.first -%}with {% else -%}{%- if leading_commas -%},{%- endif -%}{%- endif -%}{{ from_obj[0] }} as (\n\n select * from {{ from_obj[1] }}\n {%- if from_obj[2] == 'from_source' and from_list|length > 1 %} \n -- CAUTION: It's best practice to create staging layer for raw sources\n {%- elif from_obj[2] == 'from_table_1' or from_obj[2] == 'from_table_2' or from_obj[2] == 'from_table_3' %}\n -- CAUTION: It's best practice to use the ref or source function instead of a direct reference\n {%- elif from_obj[2] == 'from_var_1' or from_obj[2] == 'from_var_2' %}\n -- CAUTION: It's best practice to use the ref or source function instead of a var\n {%- endif %}\n \n){%- if ((loop.last and does_raw_sql_contain_cte) or (not loop.last)) and not leading_commas -%},{%- endif %}\n\n{% endfor -%}\n\n{%- if does_raw_sql_contain_cte -%}\n {%- if leading_commas -%}\n {%- set replace_with = '\\g<1>,' -%}\n {%- else -%}\n {%- set replace_with = '\\g<1>' -%}\n {%- endif -%}\n{{ re.sub(with_regex, replace_with, ns.model_sql, 1)|trim }}\n{%- else -%}\n{{ ns.model_sql|trim }}\n{%- endif -%}\n\n{%- endset -%}\n\n{%- else -%}\n\n{% set model_import_ctes = model_raw_sql %}\n\n{%- endif -%}\n\n{%- if execute -%}\n\n{{ log(model_import_ctes, info=True) }}\n{% do return(model_import_ctes) %}\n\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.236276, + "supported_languages": null + }, + "macro.codegen.get_tables_in_schema": { + "name": "get_tables_in_schema", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/generate_source.sql", + "original_file_path": "macros/generate_source.sql", + "unique_id": "macro.codegen.get_tables_in_schema", + "macro_sql": "{% macro get_tables_in_schema(schema_name, database_name=target.database, table_pattern='%', exclude='') %}\n \n {% set tables=dbt_utils.get_relations_by_pattern(\n schema_pattern=schema_name,\n database=database_name,\n table_pattern=table_pattern,\n exclude=exclude\n ) %}\n\n {% set table_list= tables | map(attribute='identifier') %}\n\n {{ return(table_list | sort) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.get_relations_by_pattern"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2389948, + "supported_languages": null + }, + "macro.codegen.generate_source": { + "name": "generate_source", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/generate_source.sql", + "original_file_path": "macros/generate_source.sql", + "unique_id": "macro.codegen.generate_source", + "macro_sql": "{% macro generate_source(schema_name, database_name=target.database, generate_columns=False, include_descriptions=False, include_data_types=True, table_pattern='%', exclude='', name=schema_name, table_names=None, include_database=False, include_schema=False) %}\n\n{% set sources_yaml=[] %}\n{% do sources_yaml.append('version: 2') %}\n{% do sources_yaml.append('') %}\n{% do sources_yaml.append('sources:') %}\n{% do sources_yaml.append(' - name: ' ~ name | lower) %}\n\n{% if include_descriptions %}\n {% do sources_yaml.append(' description: \"\"' ) %}\n{% endif %}\n\n{% if database_name != target.database or include_database %}\n{% do sources_yaml.append(' database: ' ~ database_name | lower) %}\n{% endif %}\n\n{% if schema_name != name or include_schema %}\n{% do sources_yaml.append(' schema: ' ~ schema_name | lower) %}\n{% endif %}\n\n{% do sources_yaml.append(' tables:') %}\n\n{% if table_names is none %}\n{% set tables=codegen.get_tables_in_schema(schema_name, database_name, table_pattern, exclude) %}\n{% else %}\n{% set tables = table_names %}\n{% endif %}\n\n{% for table in tables %}\n {% do sources_yaml.append(' - name: ' ~ table | lower ) %}\n {% if include_descriptions %}\n {% do sources_yaml.append(' description: \"\"' ) %}\n {% endif %}\n {% if generate_columns %}\n {% do sources_yaml.append(' columns:') %}\n\n {% set table_relation=api.Relation.create(\n database=database_name,\n schema=schema_name,\n identifier=table\n ) %}\n\n {% set columns=adapter.get_columns_in_relation(table_relation) %}\n\n {% for column in columns %}\n {% do sources_yaml.append(' - name: ' ~ column.name | lower ) %}\n {% if include_data_types %}\n {% do sources_yaml.append(' data_type: ' ~ codegen.data_type_format_source(column)) %}\n {% endif %}\n {% if include_descriptions %}\n {% do sources_yaml.append(' description: \"\"' ) %}\n {% endif %}\n {% endfor %}\n {% do sources_yaml.append('') %}\n\n {% endif %}\n\n{% endfor %}\n\n{% if execute %}\n\n {% set joined = sources_yaml | join ('\\n') %}\n {{ log(joined, info=True) }}\n {% do return(joined) %}\n\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.codegen.get_tables_in_schema", + "macro.codegen.data_type_format_source" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2432053, + "supported_languages": null + }, + "macro.codegen.generate_base_model": { + "name": "generate_base_model", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/generate_base_model.sql", + "original_file_path": "macros/generate_base_model.sql", + "unique_id": "macro.codegen.generate_base_model", + "macro_sql": "{% macro generate_base_model(source_name, table_name, leading_commas=False, case_sensitive_cols=False, materialized=None) %}\n\n{%- set source_relation = source(source_name, table_name) -%}\n\n{%- set columns = adapter.get_columns_in_relation(source_relation) -%}\n{% set column_names=columns | map(attribute='name') %}\n{% set base_model_sql %}\n\n{%- if materialized is not none -%}\n {{ \"{{ config(materialized='\" ~ materialized ~ \"') }}\" }}\n{%- endif %}\n\nwith source as (\n\n select * from {% raw %}{{ source({% endraw %}'{{ source_name }}', '{{ table_name }}'{% raw %}) }}{% endraw %}\n\n),\n\nrenamed as (\n\n select\n {%- if leading_commas -%}\n {%- for column in column_names %}\n {{\", \" if not loop.first}}{% if not case_sensitive_cols %}{{ column | lower }}{% elif target.type == \"bigquery\" %}{{ column }}{% else %}{{ \"\\\"\" ~ column ~ \"\\\"\" }}{% endif %}\n {%- endfor %}\n {%- else -%}\n {%- for column in column_names %}\n {% if not case_sensitive_cols %}{{ column | lower }}{% elif target.type == \"bigquery\" %}{{ column }}{% else %}{{ \"\\\"\" ~ column ~ \"\\\"\" }}{% endif %}{{\",\" if not loop.last}}\n {%- endfor -%}\n {%- endif %}\n\n from source\n\n)\n\nselect * from renamed\n{% endset %}\n\n{% if execute %}\n\n{{ log(base_model_sql, info=True) }}\n{% do return(base_model_sql) %}\n\n{% endif %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2462223, + "supported_languages": null + }, + "macro.codegen.create_base_models": { + "name": "create_base_models", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/create_base_models.sql", + "original_file_path": "macros/create_base_models.sql", + "unique_id": "macro.codegen.create_base_models", + "macro_sql": "{% macro create_base_models(source_name, tables) %}\n\n{% set source_name = \"\"~ source_name ~\"\" %}\n\n{% set zsh_command_models = \"source dbt_packages/codegen/bash_scripts/base_model_creation.sh \"\"\"~ source_name ~\"\"\" \" %}\n\n{%- set models_array = [] -%}\n\n{% for t in tables %}\n {% set help_command = zsh_command_models + t %}\n {{ models_array.append(help_command) }}\n{% endfor %}\n\n{{ log(\"Run these commands in your shell to generate the models:\\n\" ~ models_array|join(' && \\n'), info=True) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.247253, + "supported_languages": null + }, + "macro.codegen.generate_column_yaml": { + "name": "generate_column_yaml", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/generate_model_yaml.sql", + "original_file_path": "macros/generate_model_yaml.sql", + "unique_id": "macro.codegen.generate_column_yaml", + "macro_sql": "{% macro generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types, parent_column_name=\"\") %}\n {% if parent_column_name %}\n {% set column_name = parent_column_name ~ \".\" ~ column.name %}\n {% else %}\n {% set column_name = column.name %}\n {% endif %}\n\n {% do model_yaml.append(' - name: ' ~ column_name | lower ) %}\n {% if include_data_types %}\n {% do model_yaml.append(' data_type: ' ~ codegen.data_type_format_model(column)) %}\n {% endif %}\n {% do model_yaml.append(' description: \"' ~ column_desc_dict.get(column.name | lower,'') ~ '\"') %}\n {% do model_yaml.append('') %}\n\n {% if column.fields|length > 0 %}\n {% for child_column in column.fields %}\n {% set model_yaml = codegen.generate_column_yaml(child_column, model_yaml, column_desc_dict, include_data_types, parent_column_name=column_name) %}\n {% endfor %}\n {% endif %}\n {% do return(model_yaml) %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.codegen.data_type_format_model", + "macro.codegen.generate_column_yaml" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2503564, + "supported_languages": null + }, + "macro.codegen.generate_model_yaml": { + "name": "generate_model_yaml", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/generate_model_yaml.sql", + "original_file_path": "macros/generate_model_yaml.sql", + "unique_id": "macro.codegen.generate_model_yaml", + "macro_sql": "{% macro generate_model_yaml(model_names=[], upstream_descriptions=False, include_data_types=True) %}\n\n {% set model_yaml=[] %}\n\n {% do model_yaml.append('version: 2') %}\n {% do model_yaml.append('') %}\n {% do model_yaml.append('models:') %}\n\n {% if model_names is string %}\n {{ exceptions.raise_compiler_error(\"The `model_names` argument must always be a list, even if there is only one model.\") }}\n {% else %}\n {% for model in model_names %}\n {% do model_yaml.append(' - name: ' ~ model | lower) %}\n {% do model_yaml.append(' description: \"\"') %}\n {% do model_yaml.append(' columns:') %}\n\n {% set relation=ref(model) %}\n {%- set columns = adapter.get_columns_in_relation(relation) -%}\n {% set column_desc_dict = codegen.build_dict_column_descriptions(model) if upstream_descriptions else {} %}\n\n {% for column in columns %}\n {% set model_yaml = codegen.generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types) %}\n {% endfor %}\n {% endfor %}\n {% endif %}\n\n{% if execute %}\n\n {% set joined = model_yaml | join ('\\n') %}\n {{ log(joined, info=True) }}\n {% do return(joined) %}\n\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.codegen.build_dict_column_descriptions", + "macro.codegen.generate_column_yaml" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.252536, + "supported_languages": null + }, + "macro.codegen.get_model_dependencies": { + "name": "get_model_dependencies", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/helpers/helpers.sql", + "original_file_path": "macros/helpers/helpers.sql", + "unique_id": "macro.codegen.get_model_dependencies", + "macro_sql": "{% macro get_model_dependencies(model_name) %}\n {% for node in graph.nodes.values() | selectattr('name', \"equalto\", model_name) %}\n {{ return(node.depends_on.nodes) }}\n {% endfor %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2542744, + "supported_languages": null + }, + "macro.codegen.add_model_column_descriptions_to_dict": { + "name": "add_model_column_descriptions_to_dict", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/helpers/helpers.sql", + "original_file_path": "macros/helpers/helpers.sql", + "unique_id": "macro.codegen.add_model_column_descriptions_to_dict", + "macro_sql": "{% macro add_model_column_descriptions_to_dict(model_name,dict_with_descriptions={}) %}\n {% for node in graph.nodes.values() | selectattr('name', \"equalto\", model_name) %}\n {% for col_name, col_values in node.columns.items() %}\n {% do dict_with_descriptions.update( {col_name: col_values.description} ) %}\n {% endfor %}\n {% endfor %}\n {{ return(dict_with_descriptions) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2550848, + "supported_languages": null + }, + "macro.codegen.build_dict_column_descriptions": { + "name": "build_dict_column_descriptions", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/helpers/helpers.sql", + "original_file_path": "macros/helpers/helpers.sql", + "unique_id": "macro.codegen.build_dict_column_descriptions", + "macro_sql": "{% macro build_dict_column_descriptions(model_name) %}\n {% if execute %}\n {% set glob_dict = {} %}\n {% for full_model in codegen.get_model_dependencies(model_name) %}\n {% do codegen.add_model_column_descriptions_to_dict(full_model.split('.')[-1],glob_dict) %}\n {% endfor %}\n {{ return(glob_dict) }}\n {% endif %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.codegen.get_model_dependencies", + "macro.codegen.add_model_column_descriptions_to_dict" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2557795, + "supported_languages": null + }, + "macro.codegen.get_models": { + "name": "get_models", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/helpers/helpers.sql", + "original_file_path": "macros/helpers/helpers.sql", + "unique_id": "macro.codegen.get_models", + "macro_sql": "{% macro get_models(directory=None, prefix=None) %}\n {% set model_names=[] %}\n {% set models = graph.nodes.values() | selectattr('resource_type', \"equalto\", 'model') %}\n {% if directory and prefix %}\n {% for model in models %}\n {% set model_path = \"/\".join(model.path.split(\"/\")[:-1]) %}\n {% if model_path == directory and model.name.startswith(prefix) %}\n {% do model_names.append(model.name) %}\n {% endif %} \n {% endfor %}\n {% elif directory %}\n {% for model in models %}\n {% set model_path = \"/\".join(model.path.split(\"/\")[:-1]) %}\n {% if model_path == directory %}\n {% do model_names.append(model.name) %}\n {% endif %}\n {% endfor %}\n {% elif prefix %}\n {% for model in models if model.name.startswith(prefix) %}\n {% do model_names.append(model.name) %}\n {% endfor %}\n {% else %}\n {% for model in models %}\n {% do model_names.append(model.name) %}\n {% endfor %}\n {% endif %}\n {{ return(model_names) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2580016, + "supported_languages": null + }, + "macro.codegen.data_type_format_source": { + "name": "data_type_format_source", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/helpers/helpers.sql", + "original_file_path": "macros/helpers/helpers.sql", + "unique_id": "macro.codegen.data_type_format_source", + "macro_sql": "{% macro data_type_format_source(column) -%}\n {{ return(adapter.dispatch('data_type_format_source', 'codegen')(column)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.codegen.default__data_type_format_source"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2582939, + "supported_languages": null + }, + "macro.codegen.default__data_type_format_source": { + "name": "default__data_type_format_source", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/helpers/helpers.sql", + "original_file_path": "macros/helpers/helpers.sql", + "unique_id": "macro.codegen.default__data_type_format_source", + "macro_sql": "{% macro default__data_type_format_source(column) %}\n {% set formatted = codegen.format_column(column) %}\n {{ return(formatted['data_type'] | lower) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.codegen.format_column"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2586248, + "supported_languages": null + }, + "macro.codegen.data_type_format_model": { + "name": "data_type_format_model", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/helpers/helpers.sql", + "original_file_path": "macros/helpers/helpers.sql", + "unique_id": "macro.codegen.data_type_format_model", + "macro_sql": "{% macro data_type_format_model(column) -%}\n {{ return(adapter.dispatch('data_type_format_model', 'codegen')(column)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.codegen.default__data_type_format_model"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2588959, + "supported_languages": null + }, + "macro.codegen.default__data_type_format_model": { + "name": "default__data_type_format_model", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/helpers/helpers.sql", + "original_file_path": "macros/helpers/helpers.sql", + "unique_id": "macro.codegen.default__data_type_format_model", + "macro_sql": "{% macro default__data_type_format_model(column) %}\n {% set formatted = codegen.format_column(column) %}\n {{ return(formatted['data_type'] | lower) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.codegen.format_column"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2592497, + "supported_languages": null + }, + "macro.codegen.format_column": { + "name": "format_column", + "resource_type": "macro", + "package_name": "codegen", + "path": "macros/vendored/dbt_core/format_column.sql", + "original_file_path": "macros/vendored/dbt_core/format_column.sql", + "unique_id": "macro.codegen.format_column", + "macro_sql": "{% macro format_column(column) -%}\n {% set data_type = column.dtype %}\n {% set formatted = column.column.lower() ~ \" \" ~ data_type %}\n {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }}\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2598784, + "supported_languages": null + }, + "macro.dbt_profiler.print_profile": { + "name": "print_profile", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/print_profile.sql", + "original_file_path": "macros/print_profile.sql", + "unique_id": "macro.dbt_profiler.print_profile", + "macro_sql": "{% macro print_profile(relation=none, relation_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], max_rows=none, max_columns=13, max_column_width=30, max_precision=none, where_clause=none) %}\n\n{%- set results = dbt_profiler.get_profile_table(relation=relation, relation_name=relation_name, schema=schema, database=database, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns, where_clause=where_clause) -%}\n\n{% if execute %}\n {% do results.print_table(max_rows=max_rows, max_columns=max_columns, max_column_width=max_column_width, max_precision=max_precision) %}\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_profiler.get_profile_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2610776, + "supported_languages": null + }, + "macro.dbt_profiler.type_string": { + "name": "type_string", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.type_string", + "macro_sql": "\n\n{%- macro type_string() -%}\n {{ return(adapter.dispatch(\"type_string\", macro_namespace=\"dbt_profiler\")()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__type_string"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2624183, + "supported_languages": null + }, + "macro.dbt_profiler.default__type_string": { + "name": "default__type_string", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.default__type_string", + "macro_sql": "{%- macro default__type_string() -%}\n varchar\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2625406, + "supported_languages": null + }, + "macro.dbt_profiler.bigquery__type_string": { + "name": "bigquery__type_string", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.bigquery__type_string", + "macro_sql": "{%- macro bigquery__type_string() -%}\n string\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2626708, + "supported_languages": null + }, + "macro.dbt_profiler.databricks__type_string": { + "name": "databricks__type_string", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.databricks__type_string", + "macro_sql": "{%- macro databricks__type_string() -%}\n string\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2628047, + "supported_languages": null + }, + "macro.dbt_profiler.is_numeric_dtype": { + "name": "is_numeric_dtype", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.is_numeric_dtype", + "macro_sql": "\n\n{%- macro is_numeric_dtype(dtype) -%}\n {{ return(adapter.dispatch(\"is_numeric_dtype\", macro_namespace=\"dbt_profiler\")(dtype)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__is_numeric_dtype"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2630851, + "supported_languages": null + }, + "macro.dbt_profiler.default__is_numeric_dtype": { + "name": "default__is_numeric_dtype", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.default__is_numeric_dtype", + "macro_sql": "{%- macro default__is_numeric_dtype(dtype) -%}\n {% set is_numeric = dtype.startswith(\"int\") or dtype.startswith(\"float\") or \"numeric\" in dtype or \"number\" in dtype or \"double\" in dtype or \"bigint\" in dtype %}\n {% do return(is_numeric) %}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2636654, + "supported_languages": null + }, + "macro.dbt_profiler.sqlserver__is_numeric_dtype": { + "name": "sqlserver__is_numeric_dtype", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.sqlserver__is_numeric_dtype", + "macro_sql": "{%- macro sqlserver__is_numeric_dtype(dtype) -%}\n {% set is_numeric = dtype in [\"decimal\", \"numeric\", \"bigint\" \"numeric\", \"smallint\", \"decimal\", \"int\", \"tinyint\", \"money\", \"float\", \"real\"] %}\n {% do return(is_numeric) %}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2641292, + "supported_languages": null + }, + "macro.dbt_profiler.is_logical_dtype": { + "name": "is_logical_dtype", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.is_logical_dtype", + "macro_sql": "\n\n{%- macro is_logical_dtype(dtype) -%}\n {{ return(adapter.dispatch(\"is_logical_dtype\", macro_namespace=\"dbt_profiler\")(dtype)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__is_logical_dtype"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2644322, + "supported_languages": null + }, + "macro.dbt_profiler.default__is_logical_dtype": { + "name": "default__is_logical_dtype", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.default__is_logical_dtype", + "macro_sql": "{%- macro default__is_logical_dtype(dtype) -%}\n {% set is_bool = dtype.startswith(\"bool\") %}\n {% do return(is_bool) %}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.264755, + "supported_languages": null + }, + "macro.dbt_profiler.is_date_or_time_dtype": { + "name": "is_date_or_time_dtype", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.is_date_or_time_dtype", + "macro_sql": "\n\n{%- macro is_date_or_time_dtype(dtype) -%}\n {{ return(adapter.dispatch(\"is_date_or_time_dtype\", macro_namespace=\"dbt_profiler\")(dtype)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__is_date_or_time_dtype"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2650356, + "supported_languages": null + }, + "macro.dbt_profiler.default__is_date_or_time_dtype": { + "name": "default__is_date_or_time_dtype", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.default__is_date_or_time_dtype", + "macro_sql": "{%- macro default__is_date_or_time_dtype(dtype) -%}\n {% set is_date_or_time = dtype.startswith(\"timestamp\") or dtype.startswith(\"date\") %}\n {% do return(is_date_or_time) %}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2653792, + "supported_languages": null + }, + "macro.dbt_profiler.is_struct_dtype": { + "name": "is_struct_dtype", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.is_struct_dtype", + "macro_sql": "\n\n{%- macro is_struct_dtype(dtype) -%}\n {{ return(adapter.dispatch(\"is_struct_dtype\", macro_namespace=\"dbt_profiler\")(dtype)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__is_struct_dtype"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.265653, + "supported_languages": null + }, + "macro.dbt_profiler.default__is_struct_dtype": { + "name": "default__is_struct_dtype", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.default__is_struct_dtype", + "macro_sql": "{%- macro default__is_struct_dtype(dtype) -%}\n {% do return((dtype | lower).startswith('struct')) %}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.265906, + "supported_languages": null + }, + "macro.dbt_profiler.information_schema": { + "name": "information_schema", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.information_schema", + "macro_sql": "\n\n{%- macro information_schema(relation) -%}\n {{ return(adapter.dispatch(\"information_schema\", macro_namespace=\"dbt_profiler\")(relation)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__information_schema"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2662094, + "supported_languages": null + }, + "macro.dbt_profiler.default__information_schema": { + "name": "default__information_schema", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.default__information_schema", + "macro_sql": "{%- macro default__information_schema(relation) -%}\n {{ relation.information_schema() }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.information_schema"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2663915, + "supported_languages": null + }, + "macro.dbt_profiler.bigquery__information_schema": { + "name": "bigquery__information_schema", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.bigquery__information_schema", + "macro_sql": "{%- macro bigquery__information_schema(relation) -%}\n {{ adapter.quote(relation.database) }}.{{ adapter.quote(relation.schema) }}.INFORMATION_SCHEMA\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2666745, + "supported_languages": null + }, + "macro.dbt_profiler.select_from_information_schema_columns": { + "name": "select_from_information_schema_columns", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.select_from_information_schema_columns", + "macro_sql": "\n\n{%- macro select_from_information_schema_columns(relation) -%}\n {{ return(adapter.dispatch(\"select_from_information_schema_columns\", macro_namespace=\"dbt_profiler\")(relation)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.default__select_from_information_schema_columns" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2669525, + "supported_languages": null + }, + "macro.dbt_profiler.default__select_from_information_schema_columns": { + "name": "default__select_from_information_schema_columns", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.default__select_from_information_schema_columns", + "macro_sql": "{%- macro default__select_from_information_schema_columns(relation) -%}\n select\n *\n from {{ dbt_profiler.information_schema(relation) }}.COLUMNS\n where lower(table_schema) = lower('{{ relation.schema }}') \n and lower(table_name) = lower('{{ relation.identifier }}')\n order by ordinal_position asc\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.information_schema"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.267242, + "supported_languages": null + }, + "macro.dbt_profiler.redshift__select_from_information_schema_columns": { + "name": "redshift__select_from_information_schema_columns", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/cross_db_utils.sql", + "original_file_path": "macros/cross_db_utils.sql", + "unique_id": "macro.dbt_profiler.redshift__select_from_information_schema_columns", + "macro_sql": "{%- macro redshift__select_from_information_schema_columns(relation) -%}\n select\n attr.attname::varchar as column_name,\n type.typname::varchar as data_type,\n class.relname::varchar as table_name,\n namespace.nspname::varchar as table_schema\n from pg_catalog.pg_attribute as attr\n join pg_catalog.pg_type as type on (attr.atttypid = type.oid)\n join pg_catalog.pg_class as class on (attr.attrelid = class.oid)\n join pg_catalog.pg_namespace as namespace on (class.relnamespace = namespace.oid)\n where lower(table_schema) = lower('{{ relation.schema }}') \n and lower(table_name) = lower('{{ relation.identifier }}')\n and attr.attnum > 0\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2675073, + "supported_languages": null + }, + "macro.dbt_profiler.get_relation": { + "name": "get_relation", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/relation.sql", + "original_file_path": "macros/relation.sql", + "unique_id": "macro.dbt_profiler.get_relation", + "macro_sql": "{% macro get_relation(relation=none, relation_name=none, schema=none, database=none) %}\n\n{% if relation is none and relation_name is none %}\n {{ exceptions.raise_compiler_error(\"Either relation or relation_name must be specified.\") }}\n{% endif %}\n\n{% if relation is none %}\n {% if schema is none %}\n {% set schema = target.schema %}\n {% endif %}\n\n {% if database is none %}\n {% set database = target.database %}\n {% endif %}\n\n {{ log(\"Get relation %s (database=%s, schema=%s)\" | format(adapter.quote(relation_name), adapter.quote(database), adapter.quote(schema)), info=False) }}\n\n {%- \n set relation = adapter.get_relation(\n database=database,\n schema=schema,\n identifier=relation_name\n ) \n -%}\n {% if relation is none %}\n {{ exceptions.raise_compiler_error(\"Relation \" ~ adapter.quote(relation_name) ~ \" does not exist or not authorized.\") }}\n {% endif %}\n{% endif %}\n\n{% do return(relation) %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2698512, + "supported_languages": null + }, + "macro.dbt_profiler.assert_relation_exists": { + "name": "assert_relation_exists", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/relation.sql", + "original_file_path": "macros/relation.sql", + "unique_id": "macro.dbt_profiler.assert_relation_exists", + "macro_sql": "\n\n{%- macro assert_relation_exists(relation) -%}\n {{ return(adapter.dispatch(\"assert_relation_exists\", macro_namespace=\"dbt_profiler\")(relation)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__assert_relation_exists"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2702024, + "supported_languages": null + }, + "macro.dbt_profiler.default__assert_relation_exists": { + "name": "default__assert_relation_exists", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/relation.sql", + "original_file_path": "macros/relation.sql", + "unique_id": "macro.dbt_profiler.default__assert_relation_exists", + "macro_sql": "{% macro default__assert_relation_exists(relation) %}\n\n{% do run_query(\"select * from \" ~ relation ~ \" limit 0\") %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2704377, + "supported_languages": null + }, + "macro.dbt_profiler.sqlserver__assert_relation_exists": { + "name": "sqlserver__assert_relation_exists", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/relation.sql", + "original_file_path": "macros/relation.sql", + "unique_id": "macro.dbt_profiler.sqlserver__assert_relation_exists", + "macro_sql": "{% macro sqlserver__assert_relation_exists(relation) %}\n\n{% do run_query(\"select top(0) * from \" ~ relation ~ \"\") %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2706752, + "supported_languages": null + }, + "macro.dbt_profiler.print_profile_docs": { + "name": "print_profile_docs", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/print_profile_docs.sql", + "original_file_path": "macros/print_profile_docs.sql", + "unique_id": "macro.dbt_profiler.print_profile_docs", + "macro_sql": "{% macro print_profile_docs(relation=none, relation_name=none, docs_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], max_rows=none, max_columns=13, max_column_width=30, max_precision=none, where_clause=none) %}\n\n{% if execute %}\n\n {%- set results = dbt_profiler.get_profile_table(relation=relation, relation_name=relation_name, schema=schema, database=database, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns, where_clause=where_clause) -%}\n \n {% if docs_name is none %}\n {% set docs_name = 'dbt_profiler__' + relation_name %}\n {% endif %}\n \n {%- set startdocs = '{% docs ' ~ docs_name ~ ' %}' -%}\n {%- set enddocs = '{% enddocs %}' -%}\n \n {# Check if macro is called in dbt Cloud? #}\n {%- if flags.WHICH == 'rpc' -%}\n {%- set is_dbt_cloud = true -%}\n {%- else -%}\n {%- set is_dbt_cloud = false -%}\n {%- endif -%}\n \n {% if not is_dbt_cloud %}\n \n {{ print(startdocs) }}\n {% do results.print_table(max_rows=max_rows, max_columns=max_columns, max_column_width=max_column_width, max_precision=max_precision) %}\n {{ print(enddocs) }}\n \n {% else %}\n \n {%- set profile_docs=[] -%}\n {% do profile_docs.append(startdocs) -%}\n {% do profile_docs.append('') %}\n \n {# Get header from column names #}\n {%- set headers = results.column_names -%}\n {%- set header = [] -%}\n {%- set horizontal_line = [] -%}\n \n {% for i in range(0,headers|length) %}\n {% do header.append(headers[i]) %}\n {% do horizontal_line.append('---') %}\n {% endfor %}\n {% do profile_docs.append('| ' ~ header|join(' | ') ~ ' |') %}\n {% do profile_docs.append('| ' ~ horizontal_line|join(' | ') ~ ' |') %}\n \n {# Get row values #}\n {% for row in results.rows %}\n {%- set list_row = [''] -%}\n {% for val in row.values() %}\n {% do list_row.append(val) %}\n {% endfor %}\n {% do profile_docs.append(list_row|join(' | ') ~ ' |') %}\n {% endfor %}\n {% do profile_docs.append('') %}\n {% do profile_docs.append(enddocs) %}\n \n {# Join profile docs #}\n {%- set joined = profile_docs | join ('\\n') -%}\n {{ log(joined, info=True) }}\n {% do return(joined) %}\n \n {% endif %}\n \n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_profiler.get_profile_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2757168, + "supported_languages": null + }, + "macro.dbt_profiler.print_profile_schema": { + "name": "print_profile_schema", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/print_profile_schema.sql", + "original_file_path": "macros/print_profile_schema.sql", + "unique_id": "macro.dbt_profiler.print_profile_schema", + "macro_sql": "{% macro print_profile_schema(relation=none, relation_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], model_description=\"\", column_description=\"\", where_clause=none) %}\n\n{%- set column_dicts = [] -%}\n{%- set results = dbt_profiler.get_profile_table(relation=relation, relation_name=relation_name, schema=schema, database=database, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns, where_clause=where_clause) -%}\n\n{% if execute %}\n {% for row in results.rows %}\n\n {% set row_dict = row.dict() %}\n {% set column_name = row_dict.pop(\"column_name\") %}\n\n {% set meta_dict = {} %}\n {% for key, value in row_dict.items() %}\n {% set column = results.columns.get(key) %}\n {% do meta_dict.update({key: column.data_type.jsonify(value)}) %}\n {% endfor %}\n\n {% set column_dict = {\"name\": column_name, \"description\": column_description, \"meta\": meta_dict} %}\n {% do column_dicts.append(column_dict) %}\n {% endfor %}\n\n {% set schema_dict = {\n \"version\": 2,\n \"models\": [\n {\n \"name\": relation_name,\n \"description\": model_description,\n \"columns\": column_dicts\n }\n ]\n } %}\n {% set schema_yaml = toyaml(schema_dict) %}\n\n {{ log(schema_yaml, info=True) }}\n {% do return(schema_dict) %}\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_profiler.get_profile_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.2787693, + "supported_languages": null + }, + "macro.dbt_profiler.get_profile_table": { + "name": "get_profile_table", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/get_profile_table.sql", + "original_file_path": "macros/get_profile_table.sql", + "unique_id": "macro.dbt_profiler.get_profile_table", + "macro_sql": "{% macro get_profile_table(relation=none, relation_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none) %}\n\n{%- set relation = dbt_profiler.get_relation(\n relation=relation,\n relation_name=relation_name,\n schema=schema,\n database=database\n) -%}\n{%- set profile_sql = dbt_profiler.get_profile(relation=relation, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns, where_clause=where_clause) -%}\n{{ log(profile_sql, info=False) }}\n{% set results = run_query(profile_sql) %}\n{% set results = results.rename(results.column_names | map('lower')) %}\n{% do return(results) %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.get_relation", + "macro.dbt_profiler.get_profile", + "macro.dbt.run_query" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.280125, + "supported_languages": null + }, + "macro.dbt_profiler.get_profile": { + "name": "get_profile", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/get_profile.sql", + "original_file_path": "macros/get_profile.sql", + "unique_id": "macro.dbt_profiler.get_profile", + "macro_sql": "{% macro get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none, group_by=[]) %}\n {{ return(adapter.dispatch(\"get_profile\", macro_namespace=\"dbt_profiler\")(relation, exclude_measures, include_columns, exclude_columns, where_clause, group_by)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.my_nesso_project.default__get_profile"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.301033, + "supported_languages": null + }, + "macro.dbt_profiler.default__get_profile": { + "name": "default__get_profile", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/get_profile.sql", + "original_file_path": "macros/get_profile.sql", + "unique_id": "macro.dbt_profiler.default__get_profile", + "macro_sql": "{% macro default__get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none, group_by=[]) %}\n\n{%- if include_columns and exclude_columns -%}\n {{ exceptions.raise_compiler_error(\"Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.\") }}\n{%- endif -%}\n\n{%- set all_measures = [\n \"row_count\",\n \"not_null_proportion\",\n \"distinct_proportion\",\n \"distinct_count\",\n \"is_unique\",\n \"min\",\n \"max\",\n \"avg\",\n \"median\",\n \"std_dev_population\",\n \"std_dev_sample\"\n] -%}\n\n{%- set include_measures = all_measures | reject(\"in\", exclude_measures) -%}\n\n{{ log(\"Include measures: \" ~ include_measures, info=False) }}\n\n{% if execute %}\n {% do dbt_profiler.assert_relation_exists(relation) %}\n\n {{ log(\"Get columns in relation %s\" | format(relation.include()), info=False) }}\n {%- set relation_columns = adapter.get_columns_in_relation(relation) -%}\n {%- set relation_column_names = relation_columns | map(attribute=\"name\") | list -%}\n {{ log(\"Relation columns: \" ~ relation_column_names | join(', '), info=False) }}\n\n {%- if include_columns -%}\n {%- set profile_column_names = relation_column_names | select(\"in\", include_columns) | list -%}\n {%- elif exclude_columns -%}\n {%- set profile_column_names = relation_column_names | reject(\"in\", exclude_columns) | list -%}\n {%- else -%}\n {%- set profile_column_names = relation_column_names -%}\n {%- endif -%}\n\n {{ log(\"Profile columns: \" ~ profile_column_names | join(', '), info=False) }}\n\n {% set information_schema_columns = run_query(dbt_profiler.select_from_information_schema_columns(relation)) %}\n {% set information_schema_columns = information_schema_columns.rename(information_schema_columns.column_names | map('lower')) %}\n {% set information_schema_data_types = information_schema_columns.columns['data_type'].values() | map('lower') | list %}\n {% set information_schema_column_names = information_schema_columns.columns['column_name'].values() | map('lower') | list %}\n {% set data_type_map = {} %}\n {% for column_name in information_schema_column_names %}\n {% do data_type_map.update({column_name: information_schema_data_types[loop.index-1]}) %}\n {% endfor %}\n {{ log(\"Column data types: \" ~ data_type_map, info=False) }}\n\n {% set profile_sql %}\n with source_data as (\n select\n *\n from {{ relation }}\n {% if where_clause %}\n where {{ where_clause }}\n {% endif %}\n ),\n\n column_profiles as (\n {% for column_name in profile_column_names %}\n {% set data_type = data_type_map.get(column_name.lower(), \"\") %}\n select\n {%- for group_by_column in group_by %}\n {{ group_by_column }},\n {%- endfor %}\n lower('{{ column_name }}') as column_name,\n nullif(lower('{{ data_type }}'), '') as data_type,\n {% if \"row_count\" not in exclude_measures -%}\n {{ dbt_profiler.measure_row_count(column_name, data_type) }} as row_count,\n {%- endif %}\n {% if \"not_null_proportion\" not in exclude_measures -%}\n {{ dbt_profiler.measure_not_null_proportion(column_name, data_type) }} as not_null_proportion,\n {%- endif %}\n {% if \"distinct_proportion\" not in exclude_measures -%}\n {{ dbt_profiler.measure_distinct_proportion(column_name, data_type) }} as distinct_proportion,\n {%- endif %}\n {% if \"distinct_count\" not in exclude_measures -%}\n {{ dbt_profiler.measure_distinct_count(column_name, data_type) }} as distinct_count,\n {%- endif %}\n {% if \"is_unique\" not in exclude_measures -%}\n {{ dbt_profiler.measure_is_unique(column_name, data_type) }} as is_unique,\n {%- endif %}\n {% if \"min\" not in exclude_measures -%}\n {{ dbt_profiler.measure_min(column_name, data_type) }} as min,\n {%- endif %}\n {% if \"max\" not in exclude_measures -%}\n {{ dbt_profiler.measure_max(column_name, data_type) }} as max,\n {%- endif %}\n {% if \"avg\" not in exclude_measures -%}\n {{ dbt_profiler.measure_avg(column_name, data_type) }} as avg,\n {%- endif %}\n {% if \"median\" not in exclude_measures -%}\n {{ dbt_profiler.measure_median(column_name, data_type) }} as median,\n {%- endif %}\n {% if \"std_dev_population\" not in exclude_measures -%}\n {{ dbt_profiler.measure_std_dev_population(column_name, data_type) }} as std_dev_population,\n {%- endif %}\n {% if \"std_dev_sample\" not in exclude_measures -%}\n {{ dbt_profiler.measure_std_dev_sample(column_name, data_type) }} as std_dev_sample,\n {%- endif %}\n cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at,\n {{ loop.index }} as _column_position\n from source_data\n {% if group_by %}\n group by {{ group_by | join(\", \") }}\n {% endif %}\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n\n select\n {%- for group_by_column in group_by %}\n {{ group_by_column }},\n {%- endfor %}\n column_name,\n data_type,\n {% for measure in include_measures %}\n {{ measure }},\n {% endfor %}\n profiled_at\n from column_profiles\n order by {% if group_by %}{{ group_by | join(\", \") }},{% endif %} _column_position asc\n {% endset %}\n\n {% do return(profile_sql) %}\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.assert_relation_exists", + "macro.dbt.run_query", + "macro.dbt_profiler.select_from_information_schema_columns", + "macro.dbt_profiler.measure_row_count", + "macro.dbt_profiler.measure_not_null_proportion", + "macro.dbt_profiler.measure_distinct_proportion", + "macro.dbt_profiler.measure_distinct_count", + "macro.dbt_profiler.measure_is_unique", + "macro.dbt_profiler.measure_min", + "macro.dbt_profiler.measure_max", + "macro.dbt_profiler.measure_avg", + "macro.dbt_profiler.measure_median", + "macro.dbt_profiler.measure_std_dev_population", + "macro.dbt_profiler.measure_std_dev_sample", + "macro.dbt_profiler.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3607378, + "supported_languages": null + }, + "macro.dbt_profiler.databricks__get_profile": { + "name": "databricks__get_profile", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/get_profile.sql", + "original_file_path": "macros/get_profile.sql", + "unique_id": "macro.dbt_profiler.databricks__get_profile", + "macro_sql": "{% macro databricks__get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none, group_by=[]) %}\n\n{%- if include_columns and exclude_columns -%}\n {{ exceptions.raise_compiler_error(\"Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.\") }}\n{%- endif -%}\n\n{%- set all_measures = [\n \"row_count\",\n \"not_null_proportion\",\n \"distinct_proportion\",\n \"distinct_count\",\n \"is_unique\",\n \"min\",\n \"max\",\n \"avg\",\n \"median\",\n \"std_dev_population\",\n \"std_dev_sample\"\n] -%}\n\n{%- set include_measures = all_measures | reject(\"in\", exclude_measures) -%}\n\n{{ log(\"Include measures: \" ~ include_measures, info=False) }}\n\n{% if execute %}\n {% do dbt_profiler.assert_relation_exists(relation) %}\n\n {{ log(\"Get columns in relation %s\" | format(relation.include()), info=True) }}\n {%- set relation_columns = adapter.get_columns_in_relation(relation) -%}\n {%- set relation_column_names = relation_columns | map(attribute=\"name\") | list -%}\n {{ log(\"Relation columns: \" ~ relation_column_names | join(', '), info=False) }}\n\n {%- if include_columns -%}\n {%- set profile_column_names = relation_column_names | select(\"in\", include_columns) | list -%}\n {%- elif exclude_columns -%}\n {%- set profile_column_names = relation_column_names | reject(\"in\", exclude_columns) | list -%}\n {%- else -%}\n {%- set profile_column_names = relation_column_names -%}\n {%- endif -%}\n\n {{ log(\"Profile columns: \" ~ profile_column_names | join(', '), info=False) }}\n\n {# Get column metadata. #}\n {% call statement('table_metadata', fetch_result=True) -%}\n describe table extended {{ relation.schema }}.{{ relation.identifier }}\n {% endcall %}\n {% set columns_metadata = load_result('table_metadata').table %}\n {% set columns_metadata = columns_metadata.rename(columns_metadata.column_names | map('lower')) %}\n \n {% set data_types = columns_metadata.columns['data_type'].values() | map('lower') | list %}\n {% set column_names = columns_metadata.columns['col_name'].values() | map('lower') | list %}\n {% set data_type_map = {} %}\n {% for column_name in column_names %}\n {% do data_type_map.update({column_name: data_types[loop.index-1]}) %}\n {% endfor %}\n {{ log(\"Column data types: \" ~ data_type_map, info=False) }}\n\n {% set profile_sql %}\n with source_data as (\n select\n *\n from {{ relation }}\n {% if where_clause %}\n where {{ where_clause }}\n {% endif %}\n ),\n\n column_profiles as (\n {% for column_name in profile_column_names %}\n {% set data_type = data_type_map.get(column_name.lower(), \"\") %}\n select \n {%- for group_by_column in group_by %}\n {{ group_by_column }},\n {%- endfor %}\n lower('{{ column_name }}') as column_name,\n nullif(lower('{{ data_type }}'), '') as data_type,\n {% if \"row_count\" not in exclude_measures -%}\n {{ dbt_profiler.measure_row_count(column_name, data_type) }} as row_count,\n {%- endif %}\n {% if \"not_null_proportion\" not in exclude_measures -%}\n {{ dbt_profiler.measure_not_null_proportion(column_name, data_type) }} as not_null_proportion,\n {%- endif %}\n {% if \"distinct_proportion\" not in exclude_measures -%}\n {{ dbt_profiler.measure_distinct_proportion(column_name, data_type) }} as distinct_proportion,\n {%- endif %}\n {% if \"distinct_count\" not in exclude_measures -%}\n {{ dbt_profiler.measure_distinct_count(column_name, data_type) }} as distinct_count,\n {%- endif %}\n {% if \"is_unique\" not in exclude_measures -%}\n {{ dbt_profiler.measure_is_unique(column_name, data_type) }} as is_unique,\n {%- endif %}\n {% if \"min\" not in exclude_measures -%}\n {{ dbt_profiler.measure_min(column_name, data_type) }} as min,\n {%- endif %}\n {% if \"max\" not in exclude_measures -%}\n {{ dbt_profiler.measure_max(column_name, data_type) }} as max,\n {%- endif %}\n {% if \"avg\" not in exclude_measures -%}\n {{ dbt_profiler.measure_avg(column_name, data_type) }} as avg,\n {%- endif %}\n {% if \"median\" not in exclude_measures -%}\n {{ dbt_profiler.measure_median(column_name, data_type) }} as median,\n {%- endif %}\n {% if \"std_dev_population\" not in exclude_measures -%}\n {{ dbt_profiler.measure_std_dev_population(column_name, data_type) }} as std_dev_population,\n {%- endif %}\n {% if \"std_dev_sample\" not in exclude_measures -%}\n {{ dbt_profiler.measure_std_dev_sample(column_name, data_type) }} as std_dev_sample,\n {%- endif %}\n cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at,\n {{ loop.index }} as _column_position\n from source_data\n {% if group_by %}\n group by {{ group_by | join(\", \") }}\n {% endif %}\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n\n select\n {%- for group_by_column in group_by %}\n {{ group_by_column }},\n {%- endfor %}\n column_name,\n data_type,\n {% for measure in include_measures %}\n {{ measure }},\n {% endfor %}\n profiled_at\n from column_profiles\n order by {% if group_by %}{{ group_by | join(\", \") }},{% endif %} _column_position asc\n {% endset %}\n\n {# {{ print(profile_sql) }} #}\n\n {% do return(profile_sql) %}\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.assert_relation_exists", + "macro.dbt.statement", + "macro.dbt_profiler.measure_row_count", + "macro.dbt_profiler.measure_not_null_proportion", + "macro.dbt_profiler.measure_distinct_proportion", + "macro.dbt_profiler.measure_distinct_count", + "macro.dbt_profiler.measure_is_unique", + "macro.dbt_profiler.measure_min", + "macro.dbt_profiler.measure_max", + "macro.dbt_profiler.measure_avg", + "macro.dbt_profiler.measure_median", + "macro.dbt_profiler.measure_std_dev_population", + "macro.dbt_profiler.measure_std_dev_sample", + "macro.dbt_profiler.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3681998, + "supported_languages": null + }, + "macro.dbt_profiler.sqlserver__get_profile": { + "name": "sqlserver__get_profile", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/get_profile.sql", + "original_file_path": "macros/get_profile.sql", + "unique_id": "macro.dbt_profiler.sqlserver__get_profile", + "macro_sql": "{% macro sqlserver__get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none, group_by=[]) %}\n\n{%- if include_columns and exclude_columns -%}\n {{ exceptions.raise_compiler_error(\"Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.\") }}\n{%- endif -%}\n\n{%- set all_measures = [\n \"row_count\",\n \"not_null_proportion\",\n \"distinct_proportion\",\n \"distinct_count\",\n \"is_unique\",\n \"min\",\n \"max\",\n \"avg\",\n \"median\",\n \"std_dev_population\",\n \"std_dev_sample\"\n] -%}\n\n{%- set include_measures = all_measures | reject(\"in\", exclude_measures) -%}\n\n{{ log(\"Include measures: \" ~ include_measures, info=False) }}\n\n{% if execute %}\n {% do dbt_profiler.assert_relation_exists(relation) %}\n\n {{ log(\"Get columns in relation %s\" | format(relation.include()), info=False) }}\n {%- set relation_columns = adapter.get_columns_in_relation(relation) -%}\n {%- set relation_column_names = relation_columns | map(attribute=\"name\") | list -%}\n {{ log(\"Relation columns: \" ~ relation_column_names | join(', '), info=False) }}\n\n {%- if include_columns -%}\n {%- set profile_column_names = relation_column_names | select(\"in\", include_columns) | list -%}\n {%- elif exclude_columns -%}\n {%- set profile_column_names = relation_column_names | reject(\"in\", exclude_columns) | list -%}\n {%- else -%}\n {%- set profile_column_names = relation_column_names -%}\n {%- endif -%}\n\n {{ log(\"Profile columns: \" ~ profile_column_names | join(', '), info=False) }}\n\n {% set information_schema_columns = run_query(dbt_profiler.select_from_information_schema_columns(relation)) %}\n {% set information_schema_columns = information_schema_columns.rename(information_schema_columns.column_names | map('lower')) %}\n {% set information_schema_data_types = information_schema_columns.columns['data_type'].values() | map('lower') | list %}\n {% set information_schema_column_names = information_schema_columns.columns['column_name'].values() | map('lower') | list %}\n {% set data_type_map = {} %}\n {% for column_name in information_schema_column_names %}\n {% do data_type_map.update({column_name: information_schema_data_types[loop.index-1]}) %}\n {% endfor %}\n {{ log(\"Column data types: \" ~ data_type_map, info=False) }}\n\n {% set profile_sql %}\n with source_data as (\n select\n *\n from {{ relation }}\n {% if where_clause %}\n where {{ where_clause }}\n {% endif %}\n ),\n\n column_profiles as (\n {% for column_name in profile_column_names %}\n {% set data_type = data_type_map.get(column_name.lower(), \"\") %}\n select \n {%- for group_by_column in group_by %}\n {{ group_by_column }},\n {%- endfor %}\n lower('{{ column_name }}') as column_name,\n nullif(lower('{{ data_type }}'), '') as data_type,\n {% if \"row_count\" not in exclude_measures -%}\n {{ dbt_profiler.measure_row_count(column_name, data_type) }} as row_count,\n {%- endif %}\n {% if \"not_null_proportion\" not in exclude_measures -%}\n {{ dbt_profiler.measure_not_null_proportion(column_name, data_type) }} as not_null_proportion,\n {%- endif %}\n {% if \"distinct_proportion\" not in exclude_measures -%}\n {{ dbt_profiler.measure_distinct_proportion(column_name, data_type) }} as distinct_proportion,\n {%- endif %}\n {% if \"distinct_count\" not in exclude_measures -%}\n {{ dbt_profiler.measure_distinct_count(column_name, data_type) }} as distinct_count,\n {%- endif %}\n {% if \"is_unique\" not in exclude_measures -%}\n {{ dbt_profiler.measure_is_unique(column_name, data_type) }} as is_unique,\n {%- endif %}\n {% if \"min\" not in exclude_measures -%}\n {{ dbt_profiler.measure_min(column_name, data_type) }} as min,\n {%- endif %}\n {% if \"max\" not in exclude_measures -%}\n {{ dbt_profiler.measure_max(column_name, data_type) }} as max,\n {%- endif %}\n {% if \"avg\" not in exclude_measures -%}\n {{ dbt_profiler.measure_avg(column_name, data_type) }} as avg,\n {%- endif %}\n {% if \"median\" not in exclude_measures -%}\n {{ dbt_profiler.measure_median(column_name, data_type) }} as median,\n {%- endif %}\n {% if \"std_dev_population\" not in exclude_measures -%}\n {{ dbt_profiler.measure_std_dev_population(column_name, data_type) }} as std_dev_population,\n {%- endif %}\n {% if \"std_dev_sample\" not in exclude_measures -%}\n {{ dbt_profiler.measure_std_dev_sample(column_name, data_type) }} as std_dev_sample,\n {%- endif %}\n cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at,\n {{ loop.index }} as _column_position\n from source_data\n {% if group_by %}\n group by {{ group_by | join(\", \") }}\n {% endif %}\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n\n select top 100 percent\n {%- for group_by_column in group_by %}\n {{ group_by_column }},\n {%- endfor %}\n column_name,\n data_type,\n {% for measure in include_measures %}\n {{ measure }},\n {% endfor %}\n profiled_at\n from column_profiles\n order by {% if group_by %}{{ group_by | join(\", \") }},{% endif %} _column_position asc\n {% endset %}\n\n {% do return(profile_sql) %}\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.assert_relation_exists", + "macro.dbt.run_query", + "macro.dbt_profiler.select_from_information_schema_columns", + "macro.dbt_profiler.measure_row_count", + "macro.dbt_profiler.measure_not_null_proportion", + "macro.dbt_profiler.measure_distinct_proportion", + "macro.dbt_profiler.measure_distinct_count", + "macro.dbt_profiler.measure_is_unique", + "macro.dbt_profiler.measure_min", + "macro.dbt_profiler.measure_max", + "macro.dbt_profiler.measure_avg", + "macro.dbt_profiler.measure_median", + "macro.dbt_profiler.measure_std_dev_population", + "macro.dbt_profiler.measure_std_dev_sample", + "macro.dbt_profiler.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3756027, + "supported_languages": null + }, + "macro.dbt_profiler.measure_row_count": { + "name": "measure_row_count", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_row_count", + "macro_sql": "\n\n{%- macro measure_row_count(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_row_count\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_row_count"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.378875, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_row_count": { + "name": "default__measure_row_count", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_row_count", + "macro_sql": "{%- macro default__measure_row_count(column_name, data_type) -%}\ncast(count(*) as {{ dbt.type_numeric() }})\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.dbt.type_numeric"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.379098, + "supported_languages": null + }, + "macro.dbt_profiler.measure_not_null_proportion": { + "name": "measure_not_null_proportion", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_not_null_proportion", + "macro_sql": "\n\n{%- macro measure_not_null_proportion(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_not_null_proportion\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_not_null_proportion"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3794582, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_not_null_proportion": { + "name": "default__measure_not_null_proportion", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_not_null_proportion", + "macro_sql": "{%- macro default__measure_not_null_proportion(column_name, data_type) -%}\nsum(case when {{ adapter.quote(column_name) }} is null then 0 else 1 end) / cast(count(*) as {{ dbt.type_numeric() }})\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.dbt.type_numeric"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3797433, + "supported_languages": null + }, + "macro.dbt_profiler.measure_distinct_proportion": { + "name": "measure_distinct_proportion", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_distinct_proportion", + "macro_sql": "\n\n{%- macro measure_distinct_proportion(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_distinct_proportion\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_distinct_proportion"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.380061, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_distinct_proportion": { + "name": "default__measure_distinct_proportion", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_distinct_proportion", + "macro_sql": "{%- macro default__measure_distinct_proportion(column_name, data_type) -%}\n{%- if not dbt_profiler.is_struct_dtype(data_type) -%}\n count(distinct {{ adapter.quote(column_name) }}) / cast(count(*) as {{ dbt.type_numeric() }})\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3805218, + "supported_languages": null + }, + "macro.dbt_profiler.measure_distinct_count": { + "name": "measure_distinct_count", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_distinct_count", + "macro_sql": "\n\n{%- macro measure_distinct_count(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_distinct_count\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_distinct_count"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3808389, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_distinct_count": { + "name": "default__measure_distinct_count", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_distinct_count", + "macro_sql": "{%- macro default__measure_distinct_count(column_name, data_type) -%}\n{%- if not dbt_profiler.is_struct_dtype(data_type) -%}\n count(distinct {{ adapter.quote(column_name) }})\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.381293, + "supported_languages": null + }, + "macro.dbt_profiler.measure_is_unique": { + "name": "measure_is_unique", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_is_unique", + "macro_sql": "\n\n{%- macro measure_is_unique(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_is_unique\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_is_unique"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3816156, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_is_unique": { + "name": "default__measure_is_unique", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_is_unique", + "macro_sql": "{%- macro default__measure_is_unique(column_name, data_type) -%}\n{%- if not dbt_profiler.is_struct_dtype(data_type) -%}\n count(distinct {{ adapter.quote(column_name) }}) = count(*)\n{%- else -%}\n null\n{%- endif -%}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.is_struct_dtype"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3819625, + "supported_languages": null + }, + "macro.dbt_profiler.sqlserver__measure_is_unique": { + "name": "sqlserver__measure_is_unique", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.sqlserver__measure_is_unique", + "macro_sql": "{%- macro sqlserver__measure_is_unique(column_name, data_type) -%}\ncase when count(distinct {{ adapter.quote(column_name) }}) = count(*) then 1 else 0 end\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3821807, + "supported_languages": null + }, + "macro.dbt_profiler.measure_min": { + "name": "measure_min", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_min", + "macro_sql": "\n\n{%- macro measure_min(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_min\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_min"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3824944, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_min": { + "name": "default__measure_min", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_min", + "macro_sql": "{%- macro default__measure_min(column_name, data_type) -%}\n{%- if (dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type)) and not dbt_profiler.is_struct_dtype(data_type) -%}\n cast(min({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }})\n{%- else -%}\n cast(null as {{ dbt_profiler.type_string() }})\n{%- endif -%}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_date_or_time_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt_profiler.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3831537, + "supported_languages": null + }, + "macro.dbt_profiler.measure_max": { + "name": "measure_max", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_max", + "macro_sql": "\n\n{%- macro measure_max(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_max\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_max"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3834803, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_max": { + "name": "default__measure_max", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_max", + "macro_sql": "{%- macro default__measure_max(column_name, data_type) -%}\n{%- if (dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type)) and not dbt_profiler.is_struct_dtype(data_type) -%}\n cast(max({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }})\n{%- else -%}\n cast(null as {{ dbt_profiler.type_string() }})\n{%- endif -%}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_date_or_time_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt_profiler.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3840673, + "supported_languages": null + }, + "macro.dbt_profiler.measure_avg": { + "name": "measure_avg", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_avg", + "macro_sql": "\n\n{%- macro measure_avg(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_avg\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_avg"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3844914, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_avg": { + "name": "default__measure_avg", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_avg", + "macro_sql": "{%- macro default__measure_avg(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}\n avg({{ adapter.quote(column_name) }})\n{%- elif dbt_profiler.is_logical_dtype(data_type) -%}\n avg(case when {{ adapter.quote(column_name) }} then 1 else 0 end)\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt_profiler.is_logical_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3851109, + "supported_languages": null + }, + "macro.dbt_profiler.redshift__measure_avg": { + "name": "redshift__measure_avg", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.redshift__measure_avg", + "macro_sql": "{%- macro redshift__measure_avg(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}\n avg({{ adapter.quote(column_name) }}::float)\n{%- elif dbt_profiler.is_logical_dtype(data_type) -%}\n avg(case when {{ adapter.quote(column_name) }} then 1.0 else 0.0 end)\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt_profiler.is_logical_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.385723, + "supported_languages": null + }, + "macro.dbt_profiler.measure_median": { + "name": "measure_median", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_median", + "macro_sql": "\n\n{%- macro measure_median(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_median\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_median"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3860383, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_median": { + "name": "default__measure_median", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_median", + "macro_sql": "{%- macro default__measure_median(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}\n median({{ adapter.quote(column_name) }})\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3865314, + "supported_languages": null + }, + "macro.dbt_profiler.bigquery__measure_median": { + "name": "bigquery__measure_median", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.bigquery__measure_median", + "macro_sql": "{%- macro bigquery__measure_median(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}\n APPROX_QUANTILES({{ adapter.quote(column_name) }}, 100)[OFFSET(50)]\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3870113, + "supported_languages": null + }, + "macro.dbt_profiler.postgres__measure_median": { + "name": "postgres__measure_median", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.postgres__measure_median", + "macro_sql": "{%- macro postgres__measure_median(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}\n percentile_cont(0.5) within group (order by {{ adapter.quote(column_name) }})\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3875391, + "supported_languages": null + }, + "macro.dbt_profiler.sql_server__measure_median": { + "name": "sql_server__measure_median", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.sql_server__measure_median", + "macro_sql": "{%- macro sql_server__measure_median(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}\n percentile_cont({{ adapter.quote(column_name) }}, 0.5) over ()\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3880434, + "supported_languages": null + }, + "macro.dbt_profiler.measure_std_dev_population": { + "name": "measure_std_dev_population", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_std_dev_population", + "macro_sql": "\n\n{%- macro measure_std_dev_population(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_std_dev_population\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_std_dev_population"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3883843, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_std_dev_population": { + "name": "default__measure_std_dev_population", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_std_dev_population", + "macro_sql": "{%- macro default__measure_std_dev_population(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}\n stddev_pop({{ adapter.quote(column_name) }})\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3888483, + "supported_languages": null + }, + "macro.dbt_profiler.sqlserver__measure_std_dev_population": { + "name": "sqlserver__measure_std_dev_population", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.sqlserver__measure_std_dev_population", + "macro_sql": "{%- macro sqlserver__measure_std_dev_population(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) -%}\n stdevp({{ adapter.quote(column_name) }})\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}\n\n\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3893209, + "supported_languages": null + }, + "macro.dbt_profiler.measure_std_dev_sample": { + "name": "measure_std_dev_sample", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.measure_std_dev_sample", + "macro_sql": "\n\n{%- macro measure_std_dev_sample(column_name, data_type) -%}\n {{ return(adapter.dispatch(\"measure_std_dev_sample\", macro_namespace=\"dbt_profiler\")(column_name, data_type)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_profiler.default__measure_std_dev_sample"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.389648, + "supported_languages": null + }, + "macro.dbt_profiler.default__measure_std_dev_sample": { + "name": "default__measure_std_dev_sample", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.default__measure_std_dev_sample", + "macro_sql": "{%- macro default__measure_std_dev_sample(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}\n stddev_samp({{ adapter.quote(column_name) }})\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_struct_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3901353, + "supported_languages": null + }, + "macro.dbt_profiler.sqlserver__measure_std_dev_sample": { + "name": "sqlserver__measure_std_dev_sample", + "resource_type": "macro", + "package_name": "dbt_profiler", + "path": "macros/measures.sql", + "original_file_path": "macros/measures.sql", + "unique_id": "macro.dbt_profiler.sqlserver__measure_std_dev_sample", + "macro_sql": "{%- macro sqlserver__measure_std_dev_sample(column_name, data_type) -%}\n\n{%- if dbt_profiler.is_numeric_dtype(data_type) -%}\n stdev({{ adapter.quote(column_name) }})\n{%- else -%}\n cast(null as {{ dbt.type_numeric() }})\n{%- endif -%}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt.type_numeric" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3905299, + "supported_languages": null + }, + "macro.dbt_utils.get_url_host": { + "name": "get_url_host", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/web/get_url_host.sql", + "original_file_path": "macros/web/get_url_host.sql", + "unique_id": "macro.dbt_utils.get_url_host", + "macro_sql": "{% macro get_url_host(field) -%}\n {{ return(adapter.dispatch('get_url_host', 'dbt_utils')(field)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_url_host"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3910437, + "supported_languages": null + }, + "macro.dbt_utils.default__get_url_host": { + "name": "default__get_url_host", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/web/get_url_host.sql", + "original_file_path": "macros/web/get_url_host.sql", + "unique_id": "macro.dbt_utils.default__get_url_host", + "macro_sql": "{% macro default__get_url_host(field) -%}\n\n{%- set parsed =\n dbt.split_part(\n dbt.split_part(\n dbt.replace(\n dbt.replace(\n dbt.replace(field, \"'android-app://'\", \"''\"\n ), \"'http://'\", \"''\"\n ), \"'https://'\", \"''\"\n ), \"'/'\", 1\n ), \"'?'\", 1\n )\n\n-%}\n\n\n {{ dbt.safe_cast(\n parsed,\n dbt.type_string()\n )}}\n\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.split_part", + "macro.dbt.replace", + "macro.dbt.safe_cast", + "macro.dbt.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3917823, + "supported_languages": null + }, + "macro.dbt_utils.get_url_path": { + "name": "get_url_path", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/web/get_url_path.sql", + "original_file_path": "macros/web/get_url_path.sql", + "unique_id": "macro.dbt_utils.get_url_path", + "macro_sql": "{% macro get_url_path(field) -%}\n {{ return(adapter.dispatch('get_url_path', 'dbt_utils')(field)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_url_path"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3924031, + "supported_languages": null + }, + "macro.dbt_utils.default__get_url_path": { + "name": "default__get_url_path", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/web/get_url_path.sql", + "original_file_path": "macros/web/get_url_path.sql", + "unique_id": "macro.dbt_utils.default__get_url_path", + "macro_sql": "{% macro default__get_url_path(field) -%}\n\n {%- set stripped_url =\n dbt.replace(\n dbt.replace(field, \"'http://'\", \"''\"), \"'https://'\", \"''\")\n -%}\n\n {%- set first_slash_pos -%}\n coalesce(\n nullif({{ dbt.position(\"'/'\", stripped_url) }}, 0),\n {{ dbt.position(\"'?'\", stripped_url) }} - 1\n )\n {%- endset -%}\n\n {%- set parsed_path =\n dbt.split_part(\n dbt.right(\n stripped_url,\n dbt.length(stripped_url) ~ \"-\" ~ first_slash_pos\n ),\n \"'?'\", 1\n )\n -%}\n\n {{ dbt.safe_cast(\n parsed_path,\n dbt.type_string()\n )}}\n\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.replace", + "macro.dbt.position", + "macro.dbt.split_part", + "macro.dbt.right", + "macro.dbt.length", + "macro.dbt.safe_cast", + "macro.dbt.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3934004, + "supported_languages": null + }, + "macro.dbt_utils.get_url_parameter": { + "name": "get_url_parameter", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/web/get_url_parameter.sql", + "original_file_path": "macros/web/get_url_parameter.sql", + "unique_id": "macro.dbt_utils.get_url_parameter", + "macro_sql": "{% macro get_url_parameter(field, url_parameter) -%}\n {{ return(adapter.dispatch('get_url_parameter', 'dbt_utils')(field, url_parameter)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_url_parameter"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3938522, + "supported_languages": null + }, + "macro.dbt_utils.default__get_url_parameter": { + "name": "default__get_url_parameter", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/web/get_url_parameter.sql", + "original_file_path": "macros/web/get_url_parameter.sql", + "unique_id": "macro.dbt_utils.default__get_url_parameter", + "macro_sql": "{% macro default__get_url_parameter(field, url_parameter) -%}\n\n{%- set formatted_url_parameter = \"'\" + url_parameter + \"='\" -%}\n\n{%- set split = dbt.split_part(dbt.split_part(field, formatted_url_parameter, 2), \"'&'\", 1) -%}\n\nnullif({{ split }},'')\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.split_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3943298, + "supported_languages": null + }, + "macro.dbt_utils.star": { + "name": "star", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/star.sql", + "original_file_path": "macros/sql/star.sql", + "unique_id": "macro.dbt_utils.star", + "macro_sql": "{% macro star(from, relation_alias=False, except=[], prefix='', suffix='', quote_identifiers=True) -%}\r\n {{ return(adapter.dispatch('star', 'dbt_utils')(from, relation_alias, except, prefix, suffix, quote_identifiers)) }}\r\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__star"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3957705, + "supported_languages": null + }, + "macro.dbt_utils.default__star": { + "name": "default__star", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/star.sql", + "original_file_path": "macros/sql/star.sql", + "unique_id": "macro.dbt_utils.default__star", + "macro_sql": "{% macro default__star(from, relation_alias=False, except=[], prefix='', suffix='', quote_identifiers=True) -%}\r\n {%- do dbt_utils._is_relation(from, 'star') -%}\r\n {%- do dbt_utils._is_ephemeral(from, 'star') -%}\r\n\r\n {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}\r\n {%- if not execute -%}\r\n {% do return('*') %}\r\n {%- endif -%}\r\n\r\n {% set cols = dbt_utils.get_filtered_columns_in_relation(from, except) %}\r\n\r\n {%- if cols|length <= 0 -%}\r\n {% if flags.WHICH == 'compile' %}\r\n {% set response %}\r\n*\r\n/* No columns were returned. Maybe the relation doesn't exist yet \r\nor all columns were excluded. This star is only output during \r\ndbt compile, and exists to keep SQLFluff happy. */\r\n {% endset %}\r\n {% do return(response) %}\r\n {% else %}\r\n {% do return(\"/* no columns returned from star() macro */\") %}\r\n {% endif %}\r\n {%- else -%}\r\n {%- for col in cols %}\r\n {%- if relation_alias %}{{ relation_alias }}.{% else %}{%- endif -%}\r\n {%- if quote_identifiers -%}\r\n {{ adapter.quote(col)|trim }} {%- if prefix!='' or suffix!='' %} as {{ adapter.quote(prefix ~ col ~ suffix)|trim }} {%- endif -%}\r\n {%- else -%}\r\n {{ col|trim }} {%- if prefix!='' or suffix!='' %} as {{ (prefix ~ col ~ suffix)|trim }} {%- endif -%}\r\n {% endif %}\r\n {%- if not loop.last %},{{ '\\n ' }}{%- endif -%}\r\n {%- endfor -%}\r\n {% endif %}\r\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_utils._is_relation", + "macro.dbt_utils._is_ephemeral", + "macro.dbt_utils.get_filtered_columns_in_relation" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3981001, + "supported_languages": null + }, + "macro.dbt_utils.safe_add": { + "name": "safe_add", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/safe_add.sql", + "original_file_path": "macros/sql/safe_add.sql", + "unique_id": "macro.dbt_utils.safe_add", + "macro_sql": "{%- macro safe_add(field_list) -%}\n {{ return(adapter.dispatch('safe_add', 'dbt_utils')(field_list)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__safe_add"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3986766, + "supported_languages": null + }, + "macro.dbt_utils.default__safe_add": { + "name": "default__safe_add", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/safe_add.sql", + "original_file_path": "macros/sql/safe_add.sql", + "unique_id": "macro.dbt_utils.default__safe_add", + "macro_sql": "\n\n{%- macro default__safe_add(field_list) -%}\n\n{%- if field_list is not iterable or field_list is string or field_list is mapping -%}\n\n{%- set error_message = '\nWarning: the `safe_add` macro now takes a single list argument instead of \\\nstring arguments. The {}.{} model triggered this warning. \\\n'.format(model.package_name, model.name) -%}\n\n{%- do exceptions.warn(error_message) -%}\n\n{%- endif -%}\n\n{% set fields = [] %}\n\n{%- for field in field_list -%}\n\n {% do fields.append(\"coalesce(\" ~ field ~ \", 0)\") %}\n\n{%- endfor -%}\n\n{{ fields|join(' +\\n ') }}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.3994849, + "supported_languages": null + }, + "macro.dbt_utils.pivot": { + "name": "pivot", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/pivot.sql", + "original_file_path": "macros/sql/pivot.sql", + "unique_id": "macro.dbt_utils.pivot", + "macro_sql": "{% macro pivot(column,\n values,\n alias=True,\n agg='sum',\n cmp='=',\n prefix='',\n suffix='',\n then_value=1,\n else_value=0,\n quote_identifiers=True,\n distinct=False) %}\n {{ return(adapter.dispatch('pivot', 'dbt_utils')(column, values, alias, agg, cmp, prefix, suffix, then_value, else_value, quote_identifiers, distinct)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__pivot"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4008021, + "supported_languages": null + }, + "macro.dbt_utils.default__pivot": { + "name": "default__pivot", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/pivot.sql", + "original_file_path": "macros/sql/pivot.sql", + "unique_id": "macro.dbt_utils.default__pivot", + "macro_sql": "{% macro default__pivot(column,\n values,\n alias=True,\n agg='sum',\n cmp='=',\n prefix='',\n suffix='',\n then_value=1,\n else_value=0,\n quote_identifiers=True,\n distinct=False) %}\n {% for value in values %}\n {{ agg }}(\n {% if distinct %} distinct {% endif %}\n case\n when {{ column }} {{ cmp }} '{{ dbt.escape_single_quotes(value) }}'\n then {{ then_value }}\n else {{ else_value }}\n end\n )\n {% if alias %}\n {% if quote_identifiers %}\n as {{ adapter.quote(prefix ~ value ~ suffix) }}\n {% else %}\n as {{ dbt_utils.slugify(prefix ~ value ~ suffix) }}\n {% endif %}\n {% endif %}\n {% if not loop.last %},{% endif %}\n {% endfor %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.escape_single_quotes", "macro.dbt_utils.slugify"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.402139, + "supported_languages": null + }, + "macro.dbt_utils.nullcheck_table": { + "name": "nullcheck_table", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/nullcheck_table.sql", + "original_file_path": "macros/sql/nullcheck_table.sql", + "unique_id": "macro.dbt_utils.nullcheck_table", + "macro_sql": "{% macro nullcheck_table(relation) %}\n {{ return(adapter.dispatch('nullcheck_table', 'dbt_utils')(relation)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__nullcheck_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4025846, + "supported_languages": null + }, + "macro.dbt_utils.default__nullcheck_table": { + "name": "default__nullcheck_table", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/nullcheck_table.sql", + "original_file_path": "macros/sql/nullcheck_table.sql", + "unique_id": "macro.dbt_utils.default__nullcheck_table", + "macro_sql": "{% macro default__nullcheck_table(relation) %}\n\n {%- do dbt_utils._is_relation(relation, 'nullcheck_table') -%}\n {%- do dbt_utils._is_ephemeral(relation, 'nullcheck_table') -%}\n {% set cols = adapter.get_columns_in_relation(relation) %}\n\n select {{ dbt_utils.nullcheck(cols) }}\n from {{relation}}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_utils._is_relation", + "macro.dbt_utils._is_ephemeral", + "macro.dbt_utils.nullcheck" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4031084, + "supported_languages": null + }, + "macro.dbt_utils.get_filtered_columns_in_relation": { + "name": "get_filtered_columns_in_relation", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_filtered_columns_in_relation.sql", + "original_file_path": "macros/sql/get_filtered_columns_in_relation.sql", + "unique_id": "macro.dbt_utils.get_filtered_columns_in_relation", + "macro_sql": "{% macro get_filtered_columns_in_relation(from, except=[]) -%}\n {{ return(adapter.dispatch('get_filtered_columns_in_relation', 'dbt_utils')(from, except)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_filtered_columns_in_relation"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.40381, + "supported_languages": null + }, + "macro.dbt_utils.default__get_filtered_columns_in_relation": { + "name": "default__get_filtered_columns_in_relation", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_filtered_columns_in_relation.sql", + "original_file_path": "macros/sql/get_filtered_columns_in_relation.sql", + "unique_id": "macro.dbt_utils.default__get_filtered_columns_in_relation", + "macro_sql": "{% macro default__get_filtered_columns_in_relation(from, except=[]) -%}\n {%- do dbt_utils._is_relation(from, 'get_filtered_columns_in_relation') -%}\n {%- do dbt_utils._is_ephemeral(from, 'get_filtered_columns_in_relation') -%}\n\n {# -- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}\n {%- if not execute -%}\n {{ return('') }}\n {% endif %}\n\n {%- set include_cols = [] %}\n {%- set cols = adapter.get_columns_in_relation(from) -%}\n {%- set except = except | map(\"lower\") | list %}\n {%- for col in cols -%}\n {%- if col.column|lower not in except -%}\n {% do include_cols.append(col.column) %}\n {%- endif %}\n {%- endfor %}\n\n {{ return(include_cols) }}\n\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_utils._is_relation", + "macro.dbt_utils._is_ephemeral" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.405013, + "supported_languages": null + }, + "macro.dbt_utils.unpivot": { + "name": "unpivot", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/unpivot.sql", + "original_file_path": "macros/sql/unpivot.sql", + "unique_id": "macro.dbt_utils.unpivot", + "macro_sql": "{% macro unpivot(relation=none, cast_to='varchar', exclude=none, remove=none, field_name='field_name', value_name='value') -%}\n {{ return(adapter.dispatch('unpivot', 'dbt_utils')(relation, cast_to, exclude, remove, field_name, value_name)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__unpivot"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4065828, + "supported_languages": null + }, + "macro.dbt_utils.default__unpivot": { + "name": "default__unpivot", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/unpivot.sql", + "original_file_path": "macros/sql/unpivot.sql", + "unique_id": "macro.dbt_utils.default__unpivot", + "macro_sql": "{% macro default__unpivot(relation=none, cast_to='varchar', exclude=none, remove=none, field_name='field_name', value_name='value') -%}\n\n {% if not relation %}\n {{ exceptions.raise_compiler_error(\"Error: argument `relation` is required for `unpivot` macro.\") }}\n {% endif %}\n\n {%- set exclude = exclude if exclude is not none else [] %}\n {%- set remove = remove if remove is not none else [] %}\n\n {%- set include_cols = [] %}\n\n {%- set table_columns = {} %}\n\n {%- do table_columns.update({relation: []}) %}\n\n {%- do dbt_utils._is_relation(relation, 'unpivot') -%}\n {%- do dbt_utils._is_ephemeral(relation, 'unpivot') -%}\n {%- set cols = adapter.get_columns_in_relation(relation) %}\n\n {%- for col in cols -%}\n {%- if col.column.lower() not in remove|map('lower') and col.column.lower() not in exclude|map('lower') -%}\n {% do include_cols.append(col) %}\n {%- endif %}\n {%- endfor %}\n\n\n {%- for col in include_cols -%}\n select\n {%- for exclude_col in exclude %}\n {{ exclude_col }},\n {%- endfor %}\n\n cast('{{ col.column }}' as {{ dbt.type_string() }}) as {{ field_name }},\n cast( {% if col.data_type == 'boolean' %}\n {{ dbt.cast_bool_to_text(col.column) }}\n {% else %}\n {{ col.column }}\n {% endif %}\n as {{ cast_to }}) as {{ value_name }}\n\n from {{ relation }}\n\n {% if not loop.last -%}\n union all\n {% endif -%}\n {%- endfor -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_utils._is_relation", + "macro.dbt_utils._is_ephemeral", + "macro.dbt.type_string", + "macro.dbt.cast_bool_to_text" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4092047, + "supported_languages": null + }, + "macro.dbt_utils.union_relations": { + "name": "union_relations", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/union.sql", + "original_file_path": "macros/sql/union.sql", + "unique_id": "macro.dbt_utils.union_relations", + "macro_sql": "{%- macro union_relations(relations, column_override=none, include=[], exclude=[], source_column_name='_dbt_source_relation', where=none) -%}\n {{ return(adapter.dispatch('union_relations', 'dbt_utils')(relations, column_override, include, exclude, source_column_name, where)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__union_relations"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4124496, + "supported_languages": null + }, + "macro.dbt_utils.default__union_relations": { + "name": "default__union_relations", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/union.sql", + "original_file_path": "macros/sql/union.sql", + "unique_id": "macro.dbt_utils.default__union_relations", + "macro_sql": "\n\n{%- macro default__union_relations(relations, column_override=none, include=[], exclude=[], source_column_name='_dbt_source_relation', where=none) -%}\n\n {%- if exclude and include -%}\n {{ exceptions.raise_compiler_error(\"Both an exclude and include list were provided to the `union` macro. Only one is allowed\") }}\n {%- endif -%}\n\n {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. -#}\n {%- if not execute %}\n {{ return('') }}\n {% endif -%}\n\n {%- set column_override = column_override if column_override is not none else {} -%}\n\n {%- set relation_columns = {} -%}\n {%- set column_superset = {} -%}\n {%- set all_excludes = [] -%}\n {%- set all_includes = [] -%}\n\n {%- if exclude -%}\n {%- for exc in exclude -%}\n {%- do all_excludes.append(exc | lower) -%}\n {%- endfor -%}\n {%- endif -%}\n\n {%- if include -%}\n {%- for inc in include -%}\n {%- do all_includes.append(inc | lower) -%}\n {%- endfor -%}\n {%- endif -%}\n\n {%- for relation in relations -%}\n\n {%- do relation_columns.update({relation: []}) -%}\n\n {%- do dbt_utils._is_relation(relation, 'union_relations') -%}\n {%- do dbt_utils._is_ephemeral(relation, 'union_relations') -%}\n {%- set cols = adapter.get_columns_in_relation(relation) -%}\n {%- for col in cols -%}\n\n {#- If an exclude list was provided and the column is in the list, do nothing -#}\n {%- if exclude and col.column | lower in all_excludes -%}\n\n {#- If an include list was provided and the column is not in the list, do nothing -#}\n {%- elif include and col.column | lower not in all_includes -%}\n\n {#- Otherwise add the column to the column superset -#}\n {%- else -%}\n\n {#- update the list of columns in this relation -#}\n {%- do relation_columns[relation].append(col.column) -%}\n\n {%- if col.column in column_superset -%}\n\n {%- set stored = column_superset[col.column] -%}\n {%- if col.is_string() and stored.is_string() and col.string_size() > stored.string_size() -%}\n\n {%- do column_superset.update({col.column: col}) -%}\n\n {%- endif %}\n\n {%- else -%}\n\n {%- do column_superset.update({col.column: col}) -%}\n\n {%- endif -%}\n\n {%- endif -%}\n\n {%- endfor -%}\n {%- endfor -%}\n\n {%- set ordered_column_names = column_superset.keys() -%}\n {%- set dbt_command = flags.WHICH -%}\n\n\n {% if dbt_command in ['run', 'build'] %}\n {% if (include | length > 0 or exclude | length > 0) and not column_superset.keys() %}\n {%- set relations_string -%}\n {%- for relation in relations -%}\n {{ relation.name }}\n {%- if not loop.last %}, {% endif -%}\n {%- endfor -%}\n {%- endset -%}\n\n {%- set error_message -%}\n There were no columns found to union for relations {{ relations_string }}\n {%- endset -%}\n\n {{ exceptions.raise_compiler_error(error_message) }}\n {%- endif -%}\n {%- endif -%}\n\n {%- for relation in relations %}\n\n (\n select\n\n {%- if source_column_name is not none %}\n cast({{ dbt.string_literal(relation) }} as {{ dbt.type_string() }}) as {{ source_column_name }},\n {%- endif %}\n\n {% for col_name in ordered_column_names -%}\n\n {%- set col = column_superset[col_name] %}\n {%- set col_type = column_override.get(col.column, col.data_type) %}\n {%- set col_name = adapter.quote(col_name) if col_name in relation_columns[relation] else 'null' %}\n cast({{ col_name }} as {{ col_type }}) as {{ col.quoted }} {% if not loop.last %},{% endif -%}\n\n {%- endfor %}\n\n from {{ relation }}\n\n {% if where -%}\n where {{ where }}\n {%- endif %}\n )\n\n {% if not loop.last -%}\n union all\n {% endif -%}\n\n {%- endfor -%}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt_utils._is_relation", + "macro.dbt_utils._is_ephemeral", + "macro.dbt.string_literal", + "macro.dbt.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4177322, + "supported_languages": null + }, + "macro.dbt_utils.nullcheck": { + "name": "nullcheck", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/nullcheck.sql", + "original_file_path": "macros/sql/nullcheck.sql", + "unique_id": "macro.dbt_utils.nullcheck", + "macro_sql": "{% macro nullcheck(cols) %}\n {{ return(adapter.dispatch('nullcheck', 'dbt_utils')(cols)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__nullcheck"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4182558, + "supported_languages": null + }, + "macro.dbt_utils.default__nullcheck": { + "name": "default__nullcheck", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/nullcheck.sql", + "original_file_path": "macros/sql/nullcheck.sql", + "unique_id": "macro.dbt_utils.default__nullcheck", + "macro_sql": "{% macro default__nullcheck(cols) %}\n{%- for col in cols %}\n\n {% if col.is_string() -%}\n\n nullif({{col.name}},'') as {{col.name}}\n\n {%- else -%}\n\n {{col.name}}\n\n {%- endif -%}\n\n{%- if not loop.last -%} , {%- endif -%}\n\n{%- endfor -%}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.418829, + "supported_languages": null + }, + "macro.dbt_utils.degrees_to_radians": { + "name": "degrees_to_radians", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/haversine_distance.sql", + "original_file_path": "macros/sql/haversine_distance.sql", + "unique_id": "macro.dbt_utils.degrees_to_radians", + "macro_sql": "{% macro degrees_to_radians(degrees) -%}\n acos(-1) * {{degrees}} / 180\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.42021, + "supported_languages": null + }, + "macro.dbt_utils.haversine_distance": { + "name": "haversine_distance", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/haversine_distance.sql", + "original_file_path": "macros/sql/haversine_distance.sql", + "unique_id": "macro.dbt_utils.haversine_distance", + "macro_sql": "{% macro haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%}\n {{ return(adapter.dispatch('haversine_distance', 'dbt_utils')(lat1,lon1,lat2,lon2,unit)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__haversine_distance"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4206603, + "supported_languages": null + }, + "macro.dbt_utils.default__haversine_distance": { + "name": "default__haversine_distance", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/haversine_distance.sql", + "original_file_path": "macros/sql/haversine_distance.sql", + "unique_id": "macro.dbt_utils.default__haversine_distance", + "macro_sql": "{% macro default__haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%}\n{%- if unit == 'mi' %}\n {% set conversion_rate = 1 %}\n{% elif unit == 'km' %}\n {% set conversion_rate = 1.60934 %}\n{% else %}\n {{ exceptions.raise_compiler_error(\"unit input must be one of 'mi' or 'km'. Got \" ~ unit) }}\n{% endif %}\n\n 2 * 3961 * asin(sqrt(power((sin(radians(({{ lat2 }} - {{ lat1 }}) / 2))), 2) +\n cos(radians({{lat1}})) * cos(radians({{lat2}})) *\n power((sin(radians(({{ lon2 }} - {{ lon1 }}) / 2))), 2))) * {{ conversion_rate }}\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4215434, + "supported_languages": null + }, + "macro.dbt_utils.bigquery__haversine_distance": { + "name": "bigquery__haversine_distance", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/haversine_distance.sql", + "original_file_path": "macros/sql/haversine_distance.sql", + "unique_id": "macro.dbt_utils.bigquery__haversine_distance", + "macro_sql": "{% macro bigquery__haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%}\n{% set radians_lat1 = dbt_utils.degrees_to_radians(lat1) %}\n{% set radians_lat2 = dbt_utils.degrees_to_radians(lat2) %}\n{% set radians_lon1 = dbt_utils.degrees_to_radians(lon1) %}\n{% set radians_lon2 = dbt_utils.degrees_to_radians(lon2) %}\n{%- if unit == 'mi' %}\n {% set conversion_rate = 1 %}\n{% elif unit == 'km' %}\n {% set conversion_rate = 1.60934 %}\n{% else %}\n {{ exceptions.raise_compiler_error(\"unit input must be one of 'mi' or 'km'. Got \" ~ unit) }}\n{% endif %}\n 2 * 3961 * asin(sqrt(power(sin(({{ radians_lat2 }} - {{ radians_lat1 }}) / 2), 2) +\n cos({{ radians_lat1 }}) * cos({{ radians_lat2 }}) *\n power(sin(({{ radians_lon2 }} - {{ radians_lon1 }}) / 2), 2))) * {{ conversion_rate }}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.degrees_to_radians"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4227855, + "supported_languages": null + }, + "macro.dbt_utils.get_powers_of_two": { + "name": "get_powers_of_two", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/generate_series.sql", + "original_file_path": "macros/sql/generate_series.sql", + "unique_id": "macro.dbt_utils.get_powers_of_two", + "macro_sql": "{% macro get_powers_of_two(upper_bound) %}\n {{ return(adapter.dispatch('get_powers_of_two', 'dbt_utils')(upper_bound)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_powers_of_two"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4238052, + "supported_languages": null + }, + "macro.dbt_utils.default__get_powers_of_two": { + "name": "default__get_powers_of_two", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/generate_series.sql", + "original_file_path": "macros/sql/generate_series.sql", + "unique_id": "macro.dbt_utils.default__get_powers_of_two", + "macro_sql": "{% macro default__get_powers_of_two(upper_bound) %}\n\n {% if upper_bound <= 0 %}\n {{ exceptions.raise_compiler_error(\"upper bound must be positive\") }}\n {% endif %}\n\n {% for _ in range(1, 100) %}\n {% if upper_bound <= 2 ** loop.index %}{{ return(loop.index) }}{% endif %}\n {% endfor %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4244285, + "supported_languages": null + }, + "macro.dbt_utils.generate_series": { + "name": "generate_series", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/generate_series.sql", + "original_file_path": "macros/sql/generate_series.sql", + "unique_id": "macro.dbt_utils.generate_series", + "macro_sql": "{% macro generate_series(upper_bound) %}\n {{ return(adapter.dispatch('generate_series', 'dbt_utils')(upper_bound)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__generate_series"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.424742, + "supported_languages": null + }, + "macro.dbt_utils.default__generate_series": { + "name": "default__generate_series", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/generate_series.sql", + "original_file_path": "macros/sql/generate_series.sql", + "unique_id": "macro.dbt_utils.default__generate_series", + "macro_sql": "{% macro default__generate_series(upper_bound) %}\n\n {% set n = dbt_utils.get_powers_of_two(upper_bound) %}\n\n with p as (\n select 0 as generated_number union all select 1\n ), unioned as (\n\n select\n\n {% for i in range(n) %}\n p{{i}}.generated_number * power(2, {{i}})\n {% if not loop.last %} + {% endif %}\n {% endfor %}\n + 1\n as generated_number\n\n from\n\n {% for i in range(n) %}\n p as p{{i}}\n {% if not loop.last %} cross join {% endif %}\n {% endfor %}\n\n )\n\n select *\n from unioned\n where generated_number <= {{upper_bound}}\n order by generated_number\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.get_powers_of_two"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.425584, + "supported_languages": null + }, + "macro.dbt_utils.get_relations_by_pattern": { + "name": "get_relations_by_pattern", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_relations_by_pattern.sql", + "original_file_path": "macros/sql/get_relations_by_pattern.sql", + "unique_id": "macro.dbt_utils.get_relations_by_pattern", + "macro_sql": "{% macro get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database) %}\n {{ return(adapter.dispatch('get_relations_by_pattern', 'dbt_utils')(schema_pattern, table_pattern, exclude, database)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_relations_by_pattern"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4264586, + "supported_languages": null + }, + "macro.dbt_utils.default__get_relations_by_pattern": { + "name": "default__get_relations_by_pattern", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_relations_by_pattern.sql", + "original_file_path": "macros/sql/get_relations_by_pattern.sql", + "unique_id": "macro.dbt_utils.default__get_relations_by_pattern", + "macro_sql": "{% macro default__get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database) %}\n\n {%- call statement('get_tables', fetch_result=True) %}\n\n {{ dbt_utils.get_tables_by_pattern_sql(schema_pattern, table_pattern, exclude, database) }}\n\n {%- endcall -%}\n\n {%- set table_list = load_result('get_tables') -%}\n\n {%- if table_list and table_list['table'] -%}\n {%- set tbl_relations = [] -%}\n {%- for row in table_list['table'] -%}\n {%- set tbl_relation = api.Relation.create(\n database=database,\n schema=row.table_schema,\n identifier=row.table_name,\n type=row.table_type\n ) -%}\n {%- do tbl_relations.append(tbl_relation) -%}\n {%- endfor -%}\n\n {{ return(tbl_relations) }}\n {%- else -%}\n {{ return([]) }}\n {%- endif -%}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.statement", + "macro.dbt_utils.get_tables_by_pattern_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.427812, + "supported_languages": null + }, + "macro.dbt_utils.surrogate_key": { + "name": "surrogate_key", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/surrogate_key.sql", + "original_file_path": "macros/sql/surrogate_key.sql", + "unique_id": "macro.dbt_utils.surrogate_key", + "macro_sql": "{%- macro surrogate_key(field_list) -%}\n {% set frustrating_jinja_feature = varargs %}\n {{ return(adapter.dispatch('surrogate_key', 'dbt_utils')(field_list, *varargs)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__surrogate_key"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4283848, + "supported_languages": null + }, + "macro.dbt_utils.default__surrogate_key": { + "name": "default__surrogate_key", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/surrogate_key.sql", + "original_file_path": "macros/sql/surrogate_key.sql", + "unique_id": "macro.dbt_utils.default__surrogate_key", + "macro_sql": "\n\n{%- macro default__surrogate_key(field_list) -%}\n\n{%- set error_message = '\nWarning: `dbt_utils.surrogate_key` has been replaced by \\\n`dbt_utils.generate_surrogate_key`. The new macro treats null values \\\ndifferently to empty strings. To restore the behaviour of the original \\\nmacro, add a global variable in dbt_project.yml called \\\n`surrogate_key_treat_nulls_as_empty_strings` to your \\\ndbt_project.yml file with a value of True. \\\nThe {}.{} model triggered this warning. \\\n'.format(model.package_name, model.name) -%}\n\n{%- do exceptions.raise_compiler_error(error_message) -%}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4287298, + "supported_languages": null + }, + "macro.dbt_utils.get_single_value": { + "name": "get_single_value", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_single_value.sql", + "original_file_path": "macros/sql/get_single_value.sql", + "unique_id": "macro.dbt_utils.get_single_value", + "macro_sql": "{% macro get_single_value(query, default=none) %}\n {{ return(adapter.dispatch('get_single_value', 'dbt_utils')(query, default)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_single_value"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.429374, + "supported_languages": null + }, + "macro.dbt_utils.default__get_single_value": { + "name": "default__get_single_value", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_single_value.sql", + "original_file_path": "macros/sql/get_single_value.sql", + "unique_id": "macro.dbt_utils.default__get_single_value", + "macro_sql": "{% macro default__get_single_value(query, default) %}\n\n{# This macro returns the (0, 0) record in a query, i.e. the first row of the first column #}\n\n {%- call statement('get_query_result', fetch_result=True, auto_begin=false) -%}\n\n {{ query }}\n\n {%- endcall -%}\n\n {%- if execute -%}\n\n {% set r = load_result('get_query_result').table.columns[0].values() %}\n {% if r | length == 0 %}\n {% do print('Query `' ~ query ~ '` returned no rows. Using the default value: ' ~ default) %}\n {% set sql_result = default %}\n {% else %}\n {% set sql_result = r[0] %}\n {% endif %}\n \n {%- else -%}\n \n {% set sql_result = default %}\n \n {%- endif -%}\n\n {% do return(sql_result) %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4306471, + "supported_languages": null + }, + "macro.dbt_utils.deduplicate": { + "name": "deduplicate", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/deduplicate.sql", + "original_file_path": "macros/sql/deduplicate.sql", + "unique_id": "macro.dbt_utils.deduplicate", + "macro_sql": "{%- macro deduplicate(relation, partition_by, order_by) -%}\n {{ return(adapter.dispatch('deduplicate', 'dbt_utils')(relation, partition_by, order_by)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__deduplicate"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4315383, + "supported_languages": null + }, + "macro.dbt_utils.default__deduplicate": { + "name": "default__deduplicate", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/deduplicate.sql", + "original_file_path": "macros/sql/deduplicate.sql", + "unique_id": "macro.dbt_utils.default__deduplicate", + "macro_sql": "\n\n{%- macro default__deduplicate(relation, partition_by, order_by) -%}\n\n with row_numbered as (\n select\n _inner.*,\n row_number() over (\n partition by {{ partition_by }}\n order by {{ order_by }}\n ) as rn\n from {{ relation }} as _inner\n )\n\n select\n distinct data.*\n from {{ relation }} as data\n {#\n -- Not all DBs will support natural joins but the ones that do include:\n -- Oracle, MySQL, SQLite, Redshift, Teradata, Materialize, Databricks\n -- Apache Spark, SingleStore, Vertica\n -- Those that do not appear to support natural joins include:\n -- SQLServer, Trino, Presto, Rockset, Athena\n #}\n natural join row_numbered\n where row_numbered.rn = 1\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4318714, + "supported_languages": null + }, + "macro.dbt_utils.redshift__deduplicate": { + "name": "redshift__deduplicate", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/deduplicate.sql", + "original_file_path": "macros/sql/deduplicate.sql", + "unique_id": "macro.dbt_utils.redshift__deduplicate", + "macro_sql": "{% macro redshift__deduplicate(relation, partition_by, order_by) -%}\n\n {{ return(dbt_utils.default__deduplicate(relation, partition_by, order_by=order_by)) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__deduplicate"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4321694, + "supported_languages": null + }, + "macro.dbt_utils.postgres__deduplicate": { + "name": "postgres__deduplicate", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/deduplicate.sql", + "original_file_path": "macros/sql/deduplicate.sql", + "unique_id": "macro.dbt_utils.postgres__deduplicate", + "macro_sql": "\n{%- macro postgres__deduplicate(relation, partition_by, order_by) -%}\n\n select\n distinct on ({{ partition_by }}) *\n from {{ relation }}\n order by {{ partition_by }}{{ ',' ~ order_by }}\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4324563, + "supported_languages": null + }, + "macro.dbt_utils.snowflake__deduplicate": { + "name": "snowflake__deduplicate", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/deduplicate.sql", + "original_file_path": "macros/sql/deduplicate.sql", + "unique_id": "macro.dbt_utils.snowflake__deduplicate", + "macro_sql": "\n{%- macro snowflake__deduplicate(relation, partition_by, order_by) -%}\n\n select *\n from {{ relation }}\n qualify\n row_number() over (\n partition by {{ partition_by }}\n order by {{ order_by }}\n ) = 1\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4327059, + "supported_languages": null + }, + "macro.dbt_utils.bigquery__deduplicate": { + "name": "bigquery__deduplicate", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/deduplicate.sql", + "original_file_path": "macros/sql/deduplicate.sql", + "unique_id": "macro.dbt_utils.bigquery__deduplicate", + "macro_sql": "\n{%- macro bigquery__deduplicate(relation, partition_by, order_by) -%}\n\n select unique.*\n from (\n select\n array_agg (\n original\n order by {{ order_by }}\n limit 1\n )[offset(0)] unique\n from {{ relation }} original\n group by {{ partition_by }}\n )\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4329596, + "supported_languages": null + }, + "macro.dbt_utils.safe_divide": { + "name": "safe_divide", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/safe_divide.sql", + "original_file_path": "macros/sql/safe_divide.sql", + "unique_id": "macro.dbt_utils.safe_divide", + "macro_sql": "{% macro safe_divide(numerator, denominator) -%}\n {{ return(adapter.dispatch('safe_divide', 'dbt_utils')(numerator, denominator)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__safe_divide"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4334013, + "supported_languages": null + }, + "macro.dbt_utils.default__safe_divide": { + "name": "default__safe_divide", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/safe_divide.sql", + "original_file_path": "macros/sql/safe_divide.sql", + "unique_id": "macro.dbt_utils.default__safe_divide", + "macro_sql": "{% macro default__safe_divide(numerator, denominator) %}\n ( {{ numerator }} ) / nullif( ( {{ denominator }} ), 0)\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4336119, + "supported_languages": null + }, + "macro.dbt_utils.get_intervals_between": { + "name": "get_intervals_between", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/date_spine.sql", + "original_file_path": "macros/sql/date_spine.sql", + "unique_id": "macro.dbt_utils.get_intervals_between", + "macro_sql": "{% macro get_intervals_between(start_date, end_date, datepart) -%}\n {{ return(adapter.dispatch('get_intervals_between', 'dbt_utils')(start_date, end_date, datepart)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_intervals_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4344387, + "supported_languages": null + }, + "macro.dbt_utils.default__get_intervals_between": { + "name": "default__get_intervals_between", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/date_spine.sql", + "original_file_path": "macros/sql/date_spine.sql", + "unique_id": "macro.dbt_utils.default__get_intervals_between", + "macro_sql": "{% macro default__get_intervals_between(start_date, end_date, datepart) -%}\n {%- call statement('get_intervals_between', fetch_result=True) %}\n\n select {{ dbt.datediff(start_date, end_date, datepart) }}\n\n {%- endcall -%}\n\n {%- set value_list = load_result('get_intervals_between') -%}\n\n {%- if value_list and value_list['data'] -%}\n {%- set values = value_list['data'] | map(attribute=0) | list %}\n {{ return(values[0]) }}\n {%- else -%}\n {{ return(1) }}\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement", "macro.dbt.datediff"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4353755, + "supported_languages": null + }, + "macro.dbt_utils.date_spine": { + "name": "date_spine", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/date_spine.sql", + "original_file_path": "macros/sql/date_spine.sql", + "unique_id": "macro.dbt_utils.date_spine", + "macro_sql": "{% macro date_spine(datepart, start_date, end_date) %}\n {{ return(adapter.dispatch('date_spine', 'dbt_utils')(datepart, start_date, end_date)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__date_spine"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4357202, + "supported_languages": null + }, + "macro.dbt_utils.default__date_spine": { + "name": "default__date_spine", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/date_spine.sql", + "original_file_path": "macros/sql/date_spine.sql", + "unique_id": "macro.dbt_utils.default__date_spine", + "macro_sql": "{% macro default__date_spine(datepart, start_date, end_date) %}\n\n\n{# call as follows:\n\ndate_spine(\n \"day\",\n \"to_date('01/01/2016', 'mm/dd/yyyy')\",\n \"dbt.dateadd(week, 1, current_date)\"\n) #}\n\n\nwith rawdata as (\n\n {{dbt_utils.generate_series(\n dbt_utils.get_intervals_between(start_date, end_date, datepart)\n )}}\n\n),\n\nall_periods as (\n\n select (\n {{\n dbt.dateadd(\n datepart,\n \"row_number() over (order by 1) - 1\",\n start_date\n )\n }}\n ) as date_{{datepart}}\n from rawdata\n\n),\n\nfiltered as (\n\n select *\n from all_periods\n where date_{{datepart}} <= {{ end_date }}\n\n)\n\nselect * from filtered\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_utils.generate_series", + "macro.dbt_utils.get_intervals_between", + "macro.dbt.dateadd" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4362435, + "supported_languages": null + }, + "macro.dbt_utils.get_tables_by_pattern_sql": { + "name": "get_tables_by_pattern_sql", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_tables_by_pattern_sql.sql", + "original_file_path": "macros/sql/get_tables_by_pattern_sql.sql", + "unique_id": "macro.dbt_utils.get_tables_by_pattern_sql", + "macro_sql": "{% macro get_tables_by_pattern_sql(schema_pattern, table_pattern, exclude='', database=target.database) %}\n {{ return(adapter.dispatch('get_tables_by_pattern_sql', 'dbt_utils')\n (schema_pattern, table_pattern, exclude, database)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_tables_by_pattern_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4382353, + "supported_languages": null + }, + "macro.dbt_utils.default__get_tables_by_pattern_sql": { + "name": "default__get_tables_by_pattern_sql", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_tables_by_pattern_sql.sql", + "original_file_path": "macros/sql/get_tables_by_pattern_sql.sql", + "unique_id": "macro.dbt_utils.default__get_tables_by_pattern_sql", + "macro_sql": "{% macro default__get_tables_by_pattern_sql(schema_pattern, table_pattern, exclude='', database=target.database) %}\n\n select distinct\n table_schema as {{ adapter.quote('table_schema') }},\n table_name as {{ adapter.quote('table_name') }},\n {{ dbt_utils.get_table_types_sql() }}\n from {{ database }}.information_schema.tables\n where table_schema ilike '{{ schema_pattern }}'\n and table_name ilike '{{ table_pattern }}'\n and table_name not ilike '{{ exclude }}'\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.get_table_types_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4389074, + "supported_languages": null + }, + "macro.dbt_utils.bigquery__get_tables_by_pattern_sql": { + "name": "bigquery__get_tables_by_pattern_sql", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_tables_by_pattern_sql.sql", + "original_file_path": "macros/sql/get_tables_by_pattern_sql.sql", + "unique_id": "macro.dbt_utils.bigquery__get_tables_by_pattern_sql", + "macro_sql": "{% macro bigquery__get_tables_by_pattern_sql(schema_pattern, table_pattern, exclude='', database=target.database) %}\n\n {% if '%' in schema_pattern %}\n {% set schemata=dbt_utils._bigquery__get_matching_schemata(schema_pattern, database) %}\n {% else %}\n {% set schemata=[schema_pattern] %}\n {% endif %}\n\n {% set sql %}\n {% for schema in schemata %}\n select distinct\n table_schema,\n table_name,\n {{ dbt_utils.get_table_types_sql() }}\n\n from {{ adapter.quote(database) }}.{{ schema }}.INFORMATION_SCHEMA.TABLES\n where lower(table_name) like lower ('{{ table_pattern }}')\n and lower(table_name) not like lower ('{{ exclude }}')\n\n {% if not loop.last %} union all {% endif %}\n\n {% endfor %}\n {% endset %}\n\n {{ return(sql) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_utils._bigquery__get_matching_schemata", + "macro.dbt_utils.get_table_types_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4403434, + "supported_languages": null + }, + "macro.dbt_utils._bigquery__get_matching_schemata": { + "name": "_bigquery__get_matching_schemata", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_tables_by_pattern_sql.sql", + "original_file_path": "macros/sql/get_tables_by_pattern_sql.sql", + "unique_id": "macro.dbt_utils._bigquery__get_matching_schemata", + "macro_sql": "{% macro _bigquery__get_matching_schemata(schema_pattern, database) %}\n {% if execute %}\n\n {% set sql %}\n select schema_name from {{ adapter.quote(database) }}.INFORMATION_SCHEMA.SCHEMATA\n where lower(schema_name) like lower('{{ schema_pattern }}')\n {% endset %}\n\n {% set results=run_query(sql) %}\n\n {% set schemata=results.columns['schema_name'].values() %}\n\n {{ return(schemata) }}\n\n {% else %}\n\n {{ return([]) }}\n\n {% endif %}\n\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.run_query"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4411912, + "supported_languages": null + }, + "macro.dbt_utils.get_column_values": { + "name": "get_column_values", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_column_values.sql", + "original_file_path": "macros/sql/get_column_values.sql", + "unique_id": "macro.dbt_utils.get_column_values", + "macro_sql": "{% macro get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none, where=none) -%}\n {{ return(adapter.dispatch('get_column_values', 'dbt_utils')(table, column, order_by, max_records, default, where)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_column_values"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4425943, + "supported_languages": null + }, + "macro.dbt_utils.default__get_column_values": { + "name": "default__get_column_values", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_column_values.sql", + "original_file_path": "macros/sql/get_column_values.sql", + "unique_id": "macro.dbt_utils.default__get_column_values", + "macro_sql": "{% macro default__get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none, where=none) -%}\n {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}\n {%- if not execute -%}\n {% set default = [] if not default %}\n {{ return(default) }}\n {% endif %}\n\n {%- do dbt_utils._is_ephemeral(table, 'get_column_values') -%}\n\n {# Not all relations are tables. Renaming for internal clarity without breaking functionality for anyone using named arguments #}\n {# TODO: Change the method signature in a future 0.x.0 release #}\n {%- set target_relation = table -%}\n\n {# adapter.load_relation is a convenience wrapper to avoid building a Relation when we already have one #}\n {% set relation_exists = (load_relation(target_relation)) is not none %}\n\n {%- call statement('get_column_values', fetch_result=true) %}\n\n {%- if not relation_exists and default is none -%}\n\n {{ exceptions.raise_compiler_error(\"In get_column_values(): relation \" ~ target_relation ~ \" does not exist and no default value was provided.\") }}\n\n {%- elif not relation_exists and default is not none -%}\n\n {{ log(\"Relation \" ~ target_relation ~ \" does not exist. Returning the default value: \" ~ default) }}\n\n {{ return(default) }}\n\n {%- else -%}\n\n\n select\n {{ column }} as value\n\n from {{ target_relation }}\n\n {% if where is not none %}\n where {{ where }}\n {% endif %}\n\n group by {{ column }}\n order by {{ order_by }}\n\n {% if max_records is not none %}\n limit {{ max_records }}\n {% endif %}\n\n {% endif %}\n\n {%- endcall -%}\n\n {%- set value_list = load_result('get_column_values') -%}\n\n {%- if value_list and value_list['data'] -%}\n {%- set values = value_list['data'] | map(attribute=0) | list %}\n {{ return(values) }}\n {%- else -%}\n {{ return(default) }}\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_utils._is_ephemeral", + "macro.dbt.load_relation", + "macro.dbt.statement" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4449048, + "supported_languages": null + }, + "macro.dbt_utils.group_by": { + "name": "group_by", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/groupby.sql", + "original_file_path": "macros/sql/groupby.sql", + "unique_id": "macro.dbt_utils.group_by", + "macro_sql": "{%- macro group_by(n) -%}\n {{ return(adapter.dispatch('group_by', 'dbt_utils')(n)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__group_by"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4453251, + "supported_languages": null + }, + "macro.dbt_utils.default__group_by": { + "name": "default__group_by", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/groupby.sql", + "original_file_path": "macros/sql/groupby.sql", + "unique_id": "macro.dbt_utils.default__group_by", + "macro_sql": "\n\n{%- macro default__group_by(n) -%}\n\n group by {% for i in range(1, n + 1) -%}\n {{ i }}{{ ',' if not loop.last }} \n {%- endfor -%}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4457357, + "supported_languages": null + }, + "macro.dbt_utils.get_tables_by_prefix_sql": { + "name": "get_tables_by_prefix_sql", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_tables_by_prefix_sql.sql", + "original_file_path": "macros/sql/get_tables_by_prefix_sql.sql", + "unique_id": "macro.dbt_utils.get_tables_by_prefix_sql", + "macro_sql": "{% macro get_tables_by_prefix_sql(schema, prefix, exclude='', database=target.database) %}\n {{ return(adapter.dispatch('get_tables_by_prefix_sql', 'dbt_utils')(schema, prefix, exclude, database)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_tables_by_prefix_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4462929, + "supported_languages": null + }, + "macro.dbt_utils.default__get_tables_by_prefix_sql": { + "name": "default__get_tables_by_prefix_sql", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_tables_by_prefix_sql.sql", + "original_file_path": "macros/sql/get_tables_by_prefix_sql.sql", + "unique_id": "macro.dbt_utils.default__get_tables_by_prefix_sql", + "macro_sql": "{% macro default__get_tables_by_prefix_sql(schema, prefix, exclude='', database=target.database) %}\n\n {{ dbt_utils.get_tables_by_pattern_sql(\n schema_pattern = schema,\n table_pattern = prefix ~ '%',\n exclude = exclude,\n database = database\n ) }}\n \n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.get_tables_by_pattern_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.446711, + "supported_languages": null + }, + "macro.dbt_utils.get_query_results_as_dict": { + "name": "get_query_results_as_dict", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_query_results_as_dict.sql", + "original_file_path": "macros/sql/get_query_results_as_dict.sql", + "unique_id": "macro.dbt_utils.get_query_results_as_dict", + "macro_sql": "{% macro get_query_results_as_dict(query) %}\n {{ return(adapter.dispatch('get_query_results_as_dict', 'dbt_utils')(query)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_query_results_as_dict"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4472563, + "supported_languages": null + }, + "macro.dbt_utils.default__get_query_results_as_dict": { + "name": "default__get_query_results_as_dict", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_query_results_as_dict.sql", + "original_file_path": "macros/sql/get_query_results_as_dict.sql", + "unique_id": "macro.dbt_utils.default__get_query_results_as_dict", + "macro_sql": "{% macro default__get_query_results_as_dict(query) %}\n\n{# This macro returns a dictionary of the form {column_name: (tuple_of_results)} #}\n\n {%- call statement('get_query_results', fetch_result=True,auto_begin=false) -%}\n\n {{ query }}\n\n {%- endcall -%}\n\n {% set sql_results={} %}\n\n {%- if execute -%}\n {% set sql_results_table = load_result('get_query_results').table.columns %}\n {% for column_name, column in sql_results_table.items() %}\n {% do sql_results.update({column_name: column.values()}) %}\n {% endfor %}\n {%- endif -%}\n\n {{ return(sql_results) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4481847, + "supported_languages": null + }, + "macro.dbt_utils.width_bucket": { + "name": "width_bucket", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/width_bucket.sql", + "original_file_path": "macros/sql/width_bucket.sql", + "unique_id": "macro.dbt_utils.width_bucket", + "macro_sql": "{% macro width_bucket(expr, min_value, max_value, num_buckets) %}\n {{ return(adapter.dispatch('width_bucket', 'dbt_utils') (expr, min_value, max_value, num_buckets)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__width_bucket"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4491034, + "supported_languages": null + }, + "macro.dbt_utils.default__width_bucket": { + "name": "default__width_bucket", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/width_bucket.sql", + "original_file_path": "macros/sql/width_bucket.sql", + "unique_id": "macro.dbt_utils.default__width_bucket", + "macro_sql": "{% macro default__width_bucket(expr, min_value, max_value, num_buckets) -%}\n\n {% set bin_size -%}\n (( {{ max_value }} - {{ min_value }} ) / {{ num_buckets }} )\n {%- endset %}\n (\n -- to break ties when the amount is eaxtly at the bucket egde\n case\n when\n mod(\n {{ dbt.safe_cast(expr, dbt.type_numeric() ) }},\n {{ dbt.safe_cast(bin_size, dbt.type_numeric() ) }}\n ) = 0\n then 1\n else 0\n end\n ) +\n -- Anything over max_value goes the N+1 bucket\n least(\n ceil(\n ({{ expr }} - {{ min_value }})/{{ bin_size }}\n ),\n {{ num_buckets }} + 1\n )\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.safe_cast", "macro.dbt.type_numeric"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4497972, + "supported_languages": null + }, + "macro.dbt_utils.snowflake__width_bucket": { + "name": "snowflake__width_bucket", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/width_bucket.sql", + "original_file_path": "macros/sql/width_bucket.sql", + "unique_id": "macro.dbt_utils.snowflake__width_bucket", + "macro_sql": "{% macro snowflake__width_bucket(expr, min_value, max_value, num_buckets) %}\n width_bucket({{ expr }}, {{ min_value }}, {{ max_value }}, {{ num_buckets }} )\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.450101, + "supported_languages": null + }, + "macro.dbt_utils.get_table_types_sql": { + "name": "get_table_types_sql", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_table_types_sql.sql", + "original_file_path": "macros/sql/get_table_types_sql.sql", + "unique_id": "macro.dbt_utils.get_table_types_sql", + "macro_sql": "{%- macro get_table_types_sql() -%}\n {{ return(adapter.dispatch('get_table_types_sql', 'dbt_utils')()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_table_types_sql"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4507492, + "supported_languages": null + }, + "macro.dbt_utils.default__get_table_types_sql": { + "name": "default__get_table_types_sql", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_table_types_sql.sql", + "original_file_path": "macros/sql/get_table_types_sql.sql", + "unique_id": "macro.dbt_utils.default__get_table_types_sql", + "macro_sql": "{% macro default__get_table_types_sql() %}\n case table_type\n when 'BASE TABLE' then 'table'\n when 'EXTERNAL TABLE' then 'external'\n when 'MATERIALIZED VIEW' then 'materializedview'\n else lower(table_type)\n end as {{ adapter.quote('table_type') }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4509726, + "supported_languages": null + }, + "macro.dbt_utils.postgres__get_table_types_sql": { + "name": "postgres__get_table_types_sql", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_table_types_sql.sql", + "original_file_path": "macros/sql/get_table_types_sql.sql", + "unique_id": "macro.dbt_utils.postgres__get_table_types_sql", + "macro_sql": "{% macro postgres__get_table_types_sql() %}\n case table_type\n when 'BASE TABLE' then 'table'\n when 'FOREIGN' then 'external'\n when 'MATERIALIZED VIEW' then 'materializedview'\n else lower(table_type)\n end as {{ adapter.quote('table_type') }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4511783, + "supported_languages": null + }, + "macro.dbt_utils.databricks__get_table_types_sql": { + "name": "databricks__get_table_types_sql", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_table_types_sql.sql", + "original_file_path": "macros/sql/get_table_types_sql.sql", + "unique_id": "macro.dbt_utils.databricks__get_table_types_sql", + "macro_sql": "{% macro databricks__get_table_types_sql() %}\n case table_type\n when 'MANAGED' then 'table'\n when 'BASE TABLE' then 'table'\n when 'MATERIALIZED VIEW' then 'materializedview'\n else lower(table_type)\n end as {{ adapter.quote('table_type') }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4513793, + "supported_languages": null + }, + "macro.dbt_utils.safe_subtract": { + "name": "safe_subtract", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/safe_subtract.sql", + "original_file_path": "macros/sql/safe_subtract.sql", + "unique_id": "macro.dbt_utils.safe_subtract", + "macro_sql": "{%- macro safe_subtract(field_list) -%}\n {{ return(adapter.dispatch('safe_subtract', 'dbt_utils')(field_list)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__safe_subtract"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.451885, + "supported_languages": null + }, + "macro.dbt_utils.default__safe_subtract": { + "name": "default__safe_subtract", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/safe_subtract.sql", + "original_file_path": "macros/sql/safe_subtract.sql", + "unique_id": "macro.dbt_utils.default__safe_subtract", + "macro_sql": "\n\n{%- macro default__safe_subtract(field_list) -%}\n\n{%- if field_list is not iterable or field_list is string or field_list is mapping -%}\n\n{%- set error_message = '\nWarning: the `safe_subtract` macro takes a single list argument instead of \\\nstring arguments. The {}.{} model triggered this warning. \\\n'.format(model.package_name, model.name) -%}\n\n{%- do exceptions.raise_compiler_error(error_message) -%}\n\n{%- endif -%}\n\n{% set fields = [] %}\n\n{%- for field in field_list -%}\n\n {% do fields.append(\"coalesce(\" ~ field ~ \", 0)\") %}\n\n{%- endfor -%}\n\n{{ fields|join(' -\\n ') }}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4528308, + "supported_languages": null + }, + "macro.dbt_utils.generate_surrogate_key": { + "name": "generate_surrogate_key", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/generate_surrogate_key.sql", + "original_file_path": "macros/sql/generate_surrogate_key.sql", + "unique_id": "macro.dbt_utils.generate_surrogate_key", + "macro_sql": "{%- macro generate_surrogate_key(field_list) -%}\n {{ return(adapter.dispatch('generate_surrogate_key', 'dbt_utils')(field_list)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__generate_surrogate_key"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4534998, + "supported_languages": null + }, + "macro.dbt_utils.default__generate_surrogate_key": { + "name": "default__generate_surrogate_key", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/generate_surrogate_key.sql", + "original_file_path": "macros/sql/generate_surrogate_key.sql", + "unique_id": "macro.dbt_utils.default__generate_surrogate_key", + "macro_sql": "\n\n{%- macro default__generate_surrogate_key(field_list) -%}\n\n{%- if var('surrogate_key_treat_nulls_as_empty_strings', False) -%}\n {%- set default_null_value = \"\" -%}\n{%- else -%}\n {%- set default_null_value = '_dbt_utils_surrogate_key_null_' -%}\n{%- endif -%}\n\n{%- set fields = [] -%}\n\n{%- for field in field_list -%}\n\n {%- do fields.append(\n \"coalesce(cast(\" ~ field ~ \" as \" ~ dbt.type_string() ~ \"), '\" ~ default_null_value ~\"')\"\n ) -%}\n\n {%- if not loop.last %}\n {%- do fields.append(\"'-'\") -%}\n {%- endif -%}\n\n{%- endfor -%}\n\n{{ dbt.hash(dbt.concat(fields)) }}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.type_string", + "macro.dbt.hash", + "macro.dbt.concat" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4545825, + "supported_languages": null + }, + "macro.dbt_utils.get_relations_by_prefix": { + "name": "get_relations_by_prefix", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_relations_by_prefix.sql", + "original_file_path": "macros/sql/get_relations_by_prefix.sql", + "unique_id": "macro.dbt_utils.get_relations_by_prefix", + "macro_sql": "{% macro get_relations_by_prefix(schema, prefix, exclude='', database=target.database) %}\n {{ return(adapter.dispatch('get_relations_by_prefix', 'dbt_utils')(schema, prefix, exclude, database)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__get_relations_by_prefix"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4554498, + "supported_languages": null + }, + "macro.dbt_utils.default__get_relations_by_prefix": { + "name": "default__get_relations_by_prefix", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/sql/get_relations_by_prefix.sql", + "original_file_path": "macros/sql/get_relations_by_prefix.sql", + "unique_id": "macro.dbt_utils.default__get_relations_by_prefix", + "macro_sql": "{% macro default__get_relations_by_prefix(schema, prefix, exclude='', database=target.database) %}\n\n {%- call statement('get_tables', fetch_result=True) %}\n\n {{ dbt_utils.get_tables_by_prefix_sql(schema, prefix, exclude, database) }}\n\n {%- endcall -%}\n\n {%- set table_list = load_result('get_tables') -%}\n\n {%- if table_list and table_list['table'] -%}\n {%- set tbl_relations = [] -%}\n {%- for row in table_list['table'] -%}\n {%- set tbl_relation = api.Relation.create(\n database=database,\n schema=row.table_schema,\n identifier=row.table_name,\n type=row.table_type\n ) -%}\n {%- do tbl_relations.append(tbl_relation) -%}\n {%- endfor -%}\n\n {{ return(tbl_relations) }}\n {%- else -%}\n {{ return([]) }}\n {%- endif -%}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.statement", + "macro.dbt_utils.get_tables_by_prefix_sql" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.456764, + "supported_languages": null + }, + "macro.dbt_utils.pretty_log_format": { + "name": "pretty_log_format", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/jinja_helpers/pretty_log_format.sql", + "original_file_path": "macros/jinja_helpers/pretty_log_format.sql", + "unique_id": "macro.dbt_utils.pretty_log_format", + "macro_sql": "{% macro pretty_log_format(message) %}\n {{ return(adapter.dispatch('pretty_log_format', 'dbt_utils')(message)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__pretty_log_format"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4571507, + "supported_languages": null + }, + "macro.dbt_utils.default__pretty_log_format": { + "name": "default__pretty_log_format", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/jinja_helpers/pretty_log_format.sql", + "original_file_path": "macros/jinja_helpers/pretty_log_format.sql", + "unique_id": "macro.dbt_utils.default__pretty_log_format", + "macro_sql": "{% macro default__pretty_log_format(message) %}\n {{ return( dbt_utils.pretty_time() ~ ' + ' ~ message) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.pretty_time"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4574757, + "supported_languages": null + }, + "macro.dbt_utils._is_ephemeral": { + "name": "_is_ephemeral", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/jinja_helpers/_is_ephemeral.sql", + "original_file_path": "macros/jinja_helpers/_is_ephemeral.sql", + "unique_id": "macro.dbt_utils._is_ephemeral", + "macro_sql": "{% macro _is_ephemeral(obj, macro) %}\n {%- if obj.is_cte -%}\n {% set ephemeral_prefix = api.Relation.add_ephemeral_prefix('') %}\n {% if obj.name.startswith(ephemeral_prefix) %}\n {% set model_name = obj.name[(ephemeral_prefix|length):] %}\n {% else %}\n {% set model_name = obj.name %}\n {%- endif -%}\n {% set error_message %}\nThe `{{ macro }}` macro cannot be used with ephemeral models, as it relies on the information schema.\n\n`{{ model_name }}` is an ephemeral model. Consider making it a view or table instead.\n {% endset %}\n {%- do exceptions.raise_compiler_error(error_message) -%}\n {%- endif -%}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4586384, + "supported_languages": null + }, + "macro.dbt_utils.pretty_time": { + "name": "pretty_time", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/jinja_helpers/pretty_time.sql", + "original_file_path": "macros/jinja_helpers/pretty_time.sql", + "unique_id": "macro.dbt_utils.pretty_time", + "macro_sql": "{% macro pretty_time(format='%H:%M:%S') %}\n {{ return(adapter.dispatch('pretty_time', 'dbt_utils')(format)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__pretty_time"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.45904, + "supported_languages": null + }, + "macro.dbt_utils.default__pretty_time": { + "name": "default__pretty_time", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/jinja_helpers/pretty_time.sql", + "original_file_path": "macros/jinja_helpers/pretty_time.sql", + "unique_id": "macro.dbt_utils.default__pretty_time", + "macro_sql": "{% macro default__pretty_time(format='%H:%M:%S') %}\n {{ return(modules.datetime.datetime.now().strftime(format)) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4593544, + "supported_languages": null + }, + "macro.dbt_utils.slugify": { + "name": "slugify", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/jinja_helpers/slugify.sql", + "original_file_path": "macros/jinja_helpers/slugify.sql", + "unique_id": "macro.dbt_utils.slugify", + "macro_sql": "{% macro slugify(string) %}\n\n{#- Lower case the string -#}\n{% set string = string | lower %}\n{#- Replace spaces and dashes with underscores -#}\n{% set string = modules.re.sub('[ -]+', '_', string) %}\n{#- Only take letters, numbers, and underscores -#}\n{% set string = modules.re.sub('[^a-z0-9_]+', '', string) %}\n{#- Prepends \"_\" if string begins with a number -#}\n{% set string = modules.re.sub('^[0-9]', '_' + string[0], string) %}\n\n{{ return(string) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4602587, + "supported_languages": null + }, + "macro.dbt_utils._is_relation": { + "name": "_is_relation", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/jinja_helpers/_is_relation.sql", + "original_file_path": "macros/jinja_helpers/_is_relation.sql", + "unique_id": "macro.dbt_utils._is_relation", + "macro_sql": "{% macro _is_relation(obj, macro) %}\n {%- if not (obj is mapping and obj.get('metadata', {}).get('type', '').endswith('Relation')) -%}\n {%- do exceptions.raise_compiler_error(\"Macro \" ~ macro ~ \" expected a Relation but received the value: \" ~ obj) -%}\n {%- endif -%}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4608958, + "supported_languages": null + }, + "macro.dbt_utils.log_info": { + "name": "log_info", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/jinja_helpers/log_info.sql", + "original_file_path": "macros/jinja_helpers/log_info.sql", + "unique_id": "macro.dbt_utils.log_info", + "macro_sql": "{% macro log_info(message) %}\n {{ return(adapter.dispatch('log_info', 'dbt_utils')(message)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__log_info"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.461291, + "supported_languages": null + }, + "macro.dbt_utils.default__log_info": { + "name": "default__log_info", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/jinja_helpers/log_info.sql", + "original_file_path": "macros/jinja_helpers/log_info.sql", + "unique_id": "macro.dbt_utils.default__log_info", + "macro_sql": "{% macro default__log_info(message) %}\n {{ log(dbt_utils.pretty_log_format(message), info=True) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.pretty_log_format"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.461552, + "supported_languages": null + }, + "macro.dbt_utils.test_expression_is_true": { + "name": "test_expression_is_true", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/expression_is_true.sql", + "original_file_path": "macros/generic_tests/expression_is_true.sql", + "unique_id": "macro.dbt_utils.test_expression_is_true", + "macro_sql": "{% test expression_is_true(model, expression, column_name=None) %}\n {{ return(adapter.dispatch('test_expression_is_true', 'dbt_utils')(model, expression, column_name)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.462126, + "supported_languages": null + }, + "macro.dbt_utils.default__test_expression_is_true": { + "name": "default__test_expression_is_true", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/expression_is_true.sql", + "original_file_path": "macros/generic_tests/expression_is_true.sql", + "unique_id": "macro.dbt_utils.default__test_expression_is_true", + "macro_sql": "{% macro default__test_expression_is_true(model, expression, column_name) %}\n\n{% set column_list = '*' if should_store_failures() else \"1\" %}\n\nselect\n {{ column_list }}\nfrom {{ model }}\n{% if column_name is none %}\nwhere not({{ expression }})\n{%- else %}\nwhere not({{ column_name }} {{ expression }})\n{%- endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.should_store_failures"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4626715, + "supported_languages": null + }, + "macro.dbt_utils.test_not_accepted_values": { + "name": "test_not_accepted_values", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/not_accepted_values.sql", + "original_file_path": "macros/generic_tests/not_accepted_values.sql", + "unique_id": "macro.dbt_utils.test_not_accepted_values", + "macro_sql": "{% test not_accepted_values(model, column_name, values, quote=True) %}\n {{ return(adapter.dispatch('test_not_accepted_values', 'dbt_utils')(model, column_name, values, quote)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_not_accepted_values"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4633808, + "supported_languages": null + }, + "macro.dbt_utils.default__test_not_accepted_values": { + "name": "default__test_not_accepted_values", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/not_accepted_values.sql", + "original_file_path": "macros/generic_tests/not_accepted_values.sql", + "unique_id": "macro.dbt_utils.default__test_not_accepted_values", + "macro_sql": "{% macro default__test_not_accepted_values(model, column_name, values, quote=True) %}\nwith all_values as (\n\n select distinct\n {{ column_name }} as value_field\n\n from {{ model }}\n\n),\n\nvalidation_errors as (\n\n select\n value_field\n\n from all_values\n where value_field in (\n {% for value in values -%}\n {% if quote -%}\n '{{ value }}'\n {%- else -%}\n {{ value }}\n {%- endif -%}\n {%- if not loop.last -%},{%- endif %}\n {%- endfor %}\n )\n\n)\n\nselect *\nfrom validation_errors\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4640439, + "supported_languages": null + }, + "macro.dbt_utils.test_cardinality_equality": { + "name": "test_cardinality_equality", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/cardinality_equality.sql", + "original_file_path": "macros/generic_tests/cardinality_equality.sql", + "unique_id": "macro.dbt_utils.test_cardinality_equality", + "macro_sql": "{% test cardinality_equality(model, column_name, to, field) %}\n {{ return(adapter.dispatch('test_cardinality_equality', 'dbt_utils')(model, column_name, to, field)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_cardinality_equality"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4647448, + "supported_languages": null + }, + "macro.dbt_utils.default__test_cardinality_equality": { + "name": "default__test_cardinality_equality", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/cardinality_equality.sql", + "original_file_path": "macros/generic_tests/cardinality_equality.sql", + "unique_id": "macro.dbt_utils.default__test_cardinality_equality", + "macro_sql": "{% macro default__test_cardinality_equality(model, column_name, to, field) %}\n\n{# T-SQL does not let you use numbers as aliases for columns #}\n{# Thus, no \"GROUP BY 1\" #}\n\nwith table_a as (\nselect\n {{ column_name }},\n count(*) as num_rows\nfrom {{ model }}\ngroup by {{ column_name }}\n),\n\ntable_b as (\nselect\n {{ field }},\n count(*) as num_rows\nfrom {{ to }}\ngroup by {{ field }}\n),\n\nexcept_a as (\n select *\n from table_a\n {{ dbt.except() }}\n select *\n from table_b\n),\n\nexcept_b as (\n select *\n from table_b\n {{ dbt.except() }}\n select *\n from table_a\n),\n\nunioned as (\n select *\n from except_a\n union all\n select *\n from except_b\n)\n\nselect *\nfrom unioned\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.except"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4653292, + "supported_languages": null + }, + "macro.dbt_utils.test_sequential_values": { + "name": "test_sequential_values", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/sequential_values.sql", + "original_file_path": "macros/generic_tests/sequential_values.sql", + "unique_id": "macro.dbt_utils.test_sequential_values", + "macro_sql": "{% test sequential_values(model, column_name, interval=1, datepart=None, group_by_columns = []) %}\n\n {{ return(adapter.dispatch('test_sequential_values', 'dbt_utils')(model, column_name, interval, datepart, group_by_columns)) }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_sequential_values"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.466438, + "supported_languages": null + }, + "macro.dbt_utils.default__test_sequential_values": { + "name": "default__test_sequential_values", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/sequential_values.sql", + "original_file_path": "macros/generic_tests/sequential_values.sql", + "unique_id": "macro.dbt_utils.default__test_sequential_values", + "macro_sql": "{% macro default__test_sequential_values(model, column_name, interval=1, datepart=None, group_by_columns = []) %}\n\n{% set previous_column_name = \"previous_\" ~ dbt_utils.slugify(column_name) %}\n\n{% if group_by_columns|length() > 0 %}\n {% set select_gb_cols = group_by_columns|join(',') + ', ' %}\n {% set partition_gb_cols = 'partition by ' + group_by_columns|join(',') %}\n{% endif %}\n\nwith windowed as (\n\n select\n {{ select_gb_cols }}\n {{ column_name }},\n lag({{ column_name }}) over (\n {{partition_gb_cols}}\n order by {{ column_name }}\n ) as {{ previous_column_name }}\n from {{ model }}\n),\n\nvalidation_errors as (\n select\n *\n from windowed\n {% if datepart %}\n where not(cast({{ column_name }} as {{ dbt.type_timestamp() }})= cast({{ dbt.dateadd(datepart, interval, previous_column_name) }} as {{ dbt.type_timestamp() }}))\n {% else %}\n where not({{ column_name }} = {{ previous_column_name }} + {{ interval }})\n {% endif %}\n)\n\nselect *\nfrom validation_errors\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_utils.slugify", + "macro.dbt.type_timestamp", + "macro.dbt.dateadd" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4678504, + "supported_languages": null + }, + "macro.dbt_utils.test_not_null_proportion": { + "name": "test_not_null_proportion", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/not_null_proportion.sql", + "original_file_path": "macros/generic_tests/not_null_proportion.sql", + "unique_id": "macro.dbt_utils.test_not_null_proportion", + "macro_sql": "{% macro test_not_null_proportion(model, group_by_columns = []) %}\n {{ return(adapter.dispatch('test_not_null_proportion', 'dbt_utils')(model, group_by_columns, **kwargs)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_not_null_proportion"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4686918, + "supported_languages": null + }, + "macro.dbt_utils.default__test_not_null_proportion": { + "name": "default__test_not_null_proportion", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/not_null_proportion.sql", + "original_file_path": "macros/generic_tests/not_null_proportion.sql", + "unique_id": "macro.dbt_utils.default__test_not_null_proportion", + "macro_sql": "{% macro default__test_not_null_proportion(model, group_by_columns) %}\n\n{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}\n{% set at_least = kwargs.get('at_least', kwargs.get('arg')) %}\n{% set at_most = kwargs.get('at_most', kwargs.get('arg', 1)) %}\n\n{% if group_by_columns|length() > 0 %}\n {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %}\n {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}\n{% endif %}\n\nwith validation as (\n select\n {{select_gb_cols}}\n sum(case when {{ column_name }} is null then 0 else 1 end) / cast(count(*) as numeric) as not_null_proportion\n from {{ model }}\n {{groupby_gb_cols}}\n),\nvalidation_errors as (\n select\n {{select_gb_cols}}\n not_null_proportion\n from validation\n where not_null_proportion < {{ at_least }} or not_null_proportion > {{ at_most }}\n)\nselect\n *\nfrom validation_errors\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4699876, + "supported_languages": null + }, + "macro.dbt_utils.test_recency": { + "name": "test_recency", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/recency.sql", + "original_file_path": "macros/generic_tests/recency.sql", + "unique_id": "macro.dbt_utils.test_recency", + "macro_sql": "{% test recency(model, field, datepart, interval, ignore_time_component=False, group_by_columns = []) %}\n {{ return(adapter.dispatch('test_recency', 'dbt_utils')(model, field, datepart, interval, ignore_time_component, group_by_columns)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_recency"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4710057, + "supported_languages": null + }, + "macro.dbt_utils.default__test_recency": { + "name": "default__test_recency", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/recency.sql", + "original_file_path": "macros/generic_tests/recency.sql", + "unique_id": "macro.dbt_utils.default__test_recency", + "macro_sql": "{% macro default__test_recency(model, field, datepart, interval, ignore_time_component, group_by_columns) %}\n\n{% set threshold = 'cast(' ~ dbt.dateadd(datepart, interval * -1, dbt.current_timestamp()) ~ ' as ' ~ ('date' if ignore_time_component else dbt.type_timestamp()) ~ ')' %}\n\n{% if group_by_columns|length() > 0 %}\n {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %}\n {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}\n{% endif %}\n\n\nwith recency as (\n\n select \n\n {{ select_gb_cols }}\n {% if ignore_time_component %}\n cast(max({{ field }}) as date) as most_recent\n {%- else %}\n max({{ field }}) as most_recent\n {%- endif %}\n\n from {{ model }}\n\n {{ groupby_gb_cols }}\n\n)\n\nselect\n\n {{ select_gb_cols }}\n most_recent,\n {{ threshold }} as threshold\n\nfrom recency\nwhere most_recent < {{ threshold }}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.dateadd", + "macro.dbt.current_timestamp", + "macro.dbt.type_timestamp" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4723024, + "supported_languages": null + }, + "macro.dbt_utils.test_accepted_range": { + "name": "test_accepted_range", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/accepted_range.sql", + "original_file_path": "macros/generic_tests/accepted_range.sql", + "unique_id": "macro.dbt_utils.test_accepted_range", + "macro_sql": "{% test accepted_range(model, column_name, min_value=none, max_value=none, inclusive=true) %}\n {{ return(adapter.dispatch('test_accepted_range', 'dbt_utils')(model, column_name, min_value, max_value, inclusive)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_accepted_range"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4730878, + "supported_languages": null + }, + "macro.dbt_utils.default__test_accepted_range": { + "name": "default__test_accepted_range", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/accepted_range.sql", + "original_file_path": "macros/generic_tests/accepted_range.sql", + "unique_id": "macro.dbt_utils.default__test_accepted_range", + "macro_sql": "{% macro default__test_accepted_range(model, column_name, min_value=none, max_value=none, inclusive=true) %}\n\nwith meet_condition as(\n select *\n from {{ model }}\n),\n\nvalidation_errors as (\n select *\n from meet_condition\n where\n -- never true, defaults to an empty result set. Exists to ensure any combo of the `or` clauses below succeeds\n 1 = 2\n\n {%- if min_value is not none %}\n -- records with a value >= min_value are permitted. The `not` flips this to find records that don't meet the rule.\n or not {{ column_name }} > {{- \"=\" if inclusive }} {{ min_value }}\n {%- endif %}\n\n {%- if max_value is not none %}\n -- records with a value <= max_value are permitted. The `not` flips this to find records that don't meet the rule.\n or not {{ column_name }} < {{- \"=\" if inclusive }} {{ max_value }}\n {%- endif %}\n)\n\nselect *\nfrom validation_errors\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4738274, + "supported_languages": null + }, + "macro.dbt_utils.test_mutually_exclusive_ranges": { + "name": "test_mutually_exclusive_ranges", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/mutually_exclusive_ranges.sql", + "original_file_path": "macros/generic_tests/mutually_exclusive_ranges.sql", + "unique_id": "macro.dbt_utils.test_mutually_exclusive_ranges", + "macro_sql": "{% test mutually_exclusive_ranges(model, lower_bound_column, upper_bound_column, partition_by=None, gaps='allowed', zero_length_range_allowed=False) %}\n {{ return(adapter.dispatch('test_mutually_exclusive_ranges', 'dbt_utils')(model, lower_bound_column, upper_bound_column, partition_by, gaps, zero_length_range_allowed)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_mutually_exclusive_ranges"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4771938, + "supported_languages": null + }, + "macro.dbt_utils.default__test_mutually_exclusive_ranges": { + "name": "default__test_mutually_exclusive_ranges", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/mutually_exclusive_ranges.sql", + "original_file_path": "macros/generic_tests/mutually_exclusive_ranges.sql", + "unique_id": "macro.dbt_utils.default__test_mutually_exclusive_ranges", + "macro_sql": "{% macro default__test_mutually_exclusive_ranges(model, lower_bound_column, upper_bound_column, partition_by=None, gaps='allowed', zero_length_range_allowed=False) %}\n{% if gaps == 'not_allowed' %}\n {% set allow_gaps_operator='=' %}\n {% set allow_gaps_operator_in_words='equal_to' %}\n{% elif gaps == 'allowed' %}\n {% set allow_gaps_operator='<=' %}\n {% set allow_gaps_operator_in_words='less_than_or_equal_to' %}\n{% elif gaps == 'required' %}\n {% set allow_gaps_operator='<' %}\n {% set allow_gaps_operator_in_words='less_than' %}\n{% else %}\n {{ exceptions.raise_compiler_error(\n \"`gaps` argument for mutually_exclusive_ranges test must be one of ['not_allowed', 'allowed', 'required'] Got: '\" ~ gaps ~\"'.'\"\n ) }}\n{% endif %}\n{% if not zero_length_range_allowed %}\n {% set allow_zero_length_operator='<' %}\n {% set allow_zero_length_operator_in_words='less_than' %}\n{% elif zero_length_range_allowed %}\n {% set allow_zero_length_operator='<=' %}\n {% set allow_zero_length_operator_in_words='less_than_or_equal_to' %}\n{% else %}\n {{ exceptions.raise_compiler_error(\n \"`zero_length_range_allowed` argument for mutually_exclusive_ranges test must be one of [true, false] Got: '\" ~ zero_length_range_allowed ~\"'.'\"\n ) }}\n{% endif %}\n\n{% set partition_clause=\"partition by \" ~ partition_by if partition_by else '' %}\n\nwith window_functions as (\n\n select\n {% if partition_by %}\n {{ partition_by }} as partition_by_col,\n {% endif %}\n {{ lower_bound_column }} as lower_bound,\n {{ upper_bound_column }} as upper_bound,\n\n lead({{ lower_bound_column }}) over (\n {{ partition_clause }}\n order by {{ lower_bound_column }}, {{ upper_bound_column }}\n ) as next_lower_bound,\n\n row_number() over (\n {{ partition_clause }}\n order by {{ lower_bound_column }} desc, {{ upper_bound_column }} desc\n ) = 1 as is_last_record\n\n from {{ model }}\n\n),\n\ncalc as (\n -- We want to return records where one of our assumptions fails, so we'll use\n -- the `not` function with `and` statements so we can write our assumptions more cleanly\n select\n *,\n\n -- For each record: lower_bound should be < upper_bound.\n -- Coalesce it to return an error on the null case (implicit assumption\n -- these columns are not_null)\n coalesce(\n lower_bound {{ allow_zero_length_operator }} upper_bound,\n false\n ) as lower_bound_{{ allow_zero_length_operator_in_words }}_upper_bound,\n\n -- For each record: upper_bound {{ allow_gaps_operator }} the next lower_bound.\n -- Coalesce it to handle null cases for the last record.\n coalesce(\n upper_bound {{ allow_gaps_operator }} next_lower_bound,\n is_last_record,\n false\n ) as upper_bound_{{ allow_gaps_operator_in_words }}_next_lower_bound\n\n from window_functions\n\n),\n\nvalidation_errors as (\n\n select\n *\n from calc\n\n where not(\n -- THE FOLLOWING SHOULD BE TRUE --\n lower_bound_{{ allow_zero_length_operator_in_words }}_upper_bound\n and upper_bound_{{ allow_gaps_operator_in_words }}_next_lower_bound\n )\n)\n\nselect * from validation_errors\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4799728, + "supported_languages": null + }, + "macro.dbt_utils.test_equality": { + "name": "test_equality", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/equality.sql", + "original_file_path": "macros/generic_tests/equality.sql", + "unique_id": "macro.dbt_utils.test_equality", + "macro_sql": "{% test equality(model, compare_model, compare_columns=None) %}\n {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_equality"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.48103, + "supported_languages": null + }, + "macro.dbt_utils.default__test_equality": { + "name": "default__test_equality", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/equality.sql", + "original_file_path": "macros/generic_tests/equality.sql", + "unique_id": "macro.dbt_utils.default__test_equality", + "macro_sql": "{% macro default__test_equality(model, compare_model, compare_columns=None) %}\n\n{% set set_diff %}\n count(*) + coalesce(abs(\n sum(case when which_diff = 'a_minus_b' then 1 else 0 end) -\n sum(case when which_diff = 'b_minus_a' then 1 else 0 end)\n ), 0)\n{% endset %}\n\n{#-- Needs to be set at parse time, before we return '' below --#}\n{{ config(fail_calc = set_diff) }}\n\n{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}\n{%- if not execute -%}\n {{ return('') }}\n{% endif %}\n\n-- setup\n{%- do dbt_utils._is_relation(model, 'test_equality') -%}\n\n{#-\nIf the compare_cols arg is provided, we can run this test without querying the\ninformation schema\u00a0\u2014 this allows the model to be an ephemeral model\n-#}\n\n{%- if not compare_columns -%}\n {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}\n {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%}\n{%- endif -%}\n\n{% set compare_cols_csv = compare_columns | join(', ') %}\n\nwith a as (\n\n select * from {{ model }}\n\n),\n\nb as (\n\n select * from {{ compare_model }}\n\n),\n\na_minus_b as (\n\n select {{compare_cols_csv}} from a\n {{ dbt.except() }}\n select {{compare_cols_csv}} from b\n\n),\n\nb_minus_a as (\n\n select {{compare_cols_csv}} from b\n {{ dbt.except() }}\n select {{compare_cols_csv}} from a\n\n),\n\nunioned as (\n\n select 'a_minus_b' as which_diff, a_minus_b.* from a_minus_b\n union all\n select 'b_minus_a' as which_diff, b_minus_a.* from b_minus_a\n\n)\n\nselect * from unioned\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_utils._is_relation", + "macro.dbt_utils._is_ephemeral", + "macro.dbt.except" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.482389, + "supported_languages": null + }, + "macro.dbt_utils.test_relationships_where": { + "name": "test_relationships_where", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/relationships_where.sql", + "original_file_path": "macros/generic_tests/relationships_where.sql", + "unique_id": "macro.dbt_utils.test_relationships_where", + "macro_sql": "{% test relationships_where(model, column_name, to, field, from_condition=\"1=1\", to_condition=\"1=1\") %}\n {{ return(adapter.dispatch('test_relationships_where', 'dbt_utils')(model, column_name, to, field, from_condition, to_condition)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_relationships_where"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4831986, + "supported_languages": null + }, + "macro.dbt_utils.default__test_relationships_where": { + "name": "default__test_relationships_where", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/relationships_where.sql", + "original_file_path": "macros/generic_tests/relationships_where.sql", + "unique_id": "macro.dbt_utils.default__test_relationships_where", + "macro_sql": "{% macro default__test_relationships_where(model, column_name, to, field, from_condition=\"1=1\", to_condition=\"1=1\") %}\n\n{# T-SQL has no boolean data type so we use 1=1 which returns TRUE #}\n{# ref https://stackoverflow.com/a/7170753/3842610 #}\n\nwith left_table as (\n\n select\n {{column_name}} as id\n\n from {{model}}\n\n where {{column_name}} is not null\n and {{from_condition}}\n\n),\n\nright_table as (\n\n select\n {{field}} as id\n\n from {{to}}\n\n where {{field}} is not null\n and {{to_condition}}\n\n),\n\nexceptions as (\n\n select\n left_table.id,\n right_table.id as right_id\n\n from left_table\n\n left join right_table\n on left_table.id = right_table.id\n\n where right_table.id is null\n\n)\n\nselect * from exceptions\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4837492, + "supported_languages": null + }, + "macro.dbt_utils.test_not_constant": { + "name": "test_not_constant", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/not_constant.sql", + "original_file_path": "macros/generic_tests/not_constant.sql", + "unique_id": "macro.dbt_utils.test_not_constant", + "macro_sql": "{% test not_constant(model, column_name, group_by_columns = []) %}\n {{ return(adapter.dispatch('test_not_constant', 'dbt_utils')(model, column_name, group_by_columns)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_not_constant"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4843416, + "supported_languages": null + }, + "macro.dbt_utils.default__test_not_constant": { + "name": "default__test_not_constant", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/not_constant.sql", + "original_file_path": "macros/generic_tests/not_constant.sql", + "unique_id": "macro.dbt_utils.default__test_not_constant", + "macro_sql": "{% macro default__test_not_constant(model, column_name, group_by_columns) %}\n\n{% if group_by_columns|length() > 0 %}\n {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %}\n {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}\n{% endif %}\n\n\nselect\n {# In TSQL, subquery aggregate columns need aliases #}\n {# thus: a filler col name, 'filler_column' #}\n {{select_gb_cols}}\n count(distinct {{ column_name }}) as filler_column\n\nfrom {{ model }}\n\n {{groupby_gb_cols}}\n\nhaving count(distinct {{ column_name }}) = 1\n\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4850676, + "supported_languages": null + }, + "macro.dbt_utils.test_at_least_one": { + "name": "test_at_least_one", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/at_least_one.sql", + "original_file_path": "macros/generic_tests/at_least_one.sql", + "unique_id": "macro.dbt_utils.test_at_least_one", + "macro_sql": "{% test at_least_one(model, column_name, group_by_columns = []) %}\n {{ return(adapter.dispatch('test_at_least_one', 'dbt_utils')(model, column_name, group_by_columns)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_at_least_one"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4859412, + "supported_languages": null + }, + "macro.dbt_utils.default__test_at_least_one": { + "name": "default__test_at_least_one", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/at_least_one.sql", + "original_file_path": "macros/generic_tests/at_least_one.sql", + "unique_id": "macro.dbt_utils.default__test_at_least_one", + "macro_sql": "{% macro default__test_at_least_one(model, column_name, group_by_columns) %}\n\n{% set pruned_cols = [column_name] %}\n\n{% if group_by_columns|length() > 0 %}\n\n {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %}\n {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}\n {% set pruned_cols = group_by_columns %}\n\n {% if column_name not in pruned_cols %}\n {% do pruned_cols.append(column_name) %}\n {% endif %}\n\n{% endif %}\n\n{% set select_pruned_cols = pruned_cols|join(' ,') %}\n\nselect *\nfrom (\n with pruned_rows as (\n select\n {{ select_pruned_cols }}\n from {{ model }}\n where {{ column_name }} is not null\n limit 1\n )\n select\n {# In TSQL, subquery aggregate columns need aliases #}\n {# thus: a filler col name, 'filler_column' #}\n {{select_gb_cols}}\n count({{ column_name }}) as filler_column\n\n from pruned_rows\n\n {{groupby_gb_cols}}\n\n having count({{ column_name }}) = 0\n\n) validation_errors\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4871364, + "supported_languages": null + }, + "macro.dbt_utils.test_unique_combination_of_columns": { + "name": "test_unique_combination_of_columns", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/unique_combination_of_columns.sql", + "original_file_path": "macros/generic_tests/unique_combination_of_columns.sql", + "unique_id": "macro.dbt_utils.test_unique_combination_of_columns", + "macro_sql": "{% test unique_combination_of_columns(model, combination_of_columns, quote_columns=false) %}\n {{ return(adapter.dispatch('test_unique_combination_of_columns', 'dbt_utils')(model, combination_of_columns, quote_columns)) }}\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_utils.default__test_unique_combination_of_columns" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.488035, + "supported_languages": null + }, + "macro.dbt_utils.default__test_unique_combination_of_columns": { + "name": "default__test_unique_combination_of_columns", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/unique_combination_of_columns.sql", + "original_file_path": "macros/generic_tests/unique_combination_of_columns.sql", + "unique_id": "macro.dbt_utils.default__test_unique_combination_of_columns", + "macro_sql": "{% macro default__test_unique_combination_of_columns(model, combination_of_columns, quote_columns=false) %}\n\n{% if not quote_columns %}\n {%- set column_list=combination_of_columns %}\n{% elif quote_columns %}\n {%- set column_list=[] %}\n {% for column in combination_of_columns -%}\n {% set column_list = column_list.append( adapter.quote(column) ) %}\n {%- endfor %}\n{% else %}\n {{ exceptions.raise_compiler_error(\n \"`quote_columns` argument for unique_combination_of_columns test must be one of [True, False] Got: '\" ~ quote ~\"'.'\"\n ) }}\n{% endif %}\n\n{%- set columns_csv=column_list | join(', ') %}\n\n\nwith validation_errors as (\n\n select\n {{ columns_csv }}\n from {{ model }}\n group by {{ columns_csv }}\n having count(*) > 1\n\n)\n\nselect *\nfrom validation_errors\n\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4891157, + "supported_languages": null + }, + "macro.dbt_utils.test_not_empty_string": { + "name": "test_not_empty_string", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/not_empty_string.sql", + "original_file_path": "macros/generic_tests/not_empty_string.sql", + "unique_id": "macro.dbt_utils.test_not_empty_string", + "macro_sql": "{% test not_empty_string(model, column_name, trim_whitespace=true) %}\n\n {{ return(adapter.dispatch('test_not_empty_string', 'dbt_utils')(model, column_name, trim_whitespace)) }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_not_empty_string"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4898841, + "supported_languages": null + }, + "macro.dbt_utils.default__test_not_empty_string": { + "name": "default__test_not_empty_string", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/not_empty_string.sql", + "original_file_path": "macros/generic_tests/not_empty_string.sql", + "unique_id": "macro.dbt_utils.default__test_not_empty_string", + "macro_sql": "{% macro default__test_not_empty_string(model, column_name, trim_whitespace=true) %}\n\n with\n \n all_values as (\n\n select \n\n\n {% if trim_whitespace == true -%}\n\n trim({{ column_name }}) as {{ column_name }}\n\n {%- else -%}\n\n {{ column_name }}\n\n {%- endif %}\n \n from {{ model }}\n\n ),\n\n errors as (\n\n select * from all_values\n where {{ column_name }} = ''\n\n )\n\n select * from errors\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4903939, + "supported_languages": null + }, + "macro.dbt_utils.test_fewer_rows_than": { + "name": "test_fewer_rows_than", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/fewer_rows_than.sql", + "original_file_path": "macros/generic_tests/fewer_rows_than.sql", + "unique_id": "macro.dbt_utils.test_fewer_rows_than", + "macro_sql": "{% test fewer_rows_than(model, compare_model, group_by_columns = []) %}\n {{ return(adapter.dispatch('test_fewer_rows_than', 'dbt_utils')(model, compare_model, group_by_columns)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_fewer_rows_than"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4917734, + "supported_languages": null + }, + "macro.dbt_utils.default__test_fewer_rows_than": { + "name": "default__test_fewer_rows_than", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/fewer_rows_than.sql", + "original_file_path": "macros/generic_tests/fewer_rows_than.sql", + "unique_id": "macro.dbt_utils.default__test_fewer_rows_than", + "macro_sql": "{% macro default__test_fewer_rows_than(model, compare_model, group_by_columns) %}\n\n{{ config(fail_calc = 'sum(coalesce(row_count_delta, 0))') }}\n\n{% if group_by_columns|length() > 0 %}\n {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %}\n {% set join_gb_cols %}\n {% for c in group_by_columns %}\n and a.{{c}} = b.{{c}}\n {% endfor %}\n {% endset %}\n {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}\n{% endif %}\n\n{#-- We must add a fake join key in case additional grouping variables are not provided --#}\n{#-- Redshift does not allow for dynamically created join conditions (e.g. full join on 1 = 1 --#}\n{#-- The same logic is used in equal_rowcount. In case of changes, maintain consistent logic --#}\n{% set group_by_columns = ['id_dbtutils_test_fewer_rows_than'] + group_by_columns %}\n{% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}\n\n\nwith a as (\n\n select \n {{select_gb_cols}}\n 1 as id_dbtutils_test_fewer_rows_than,\n count(*) as count_our_model \n from {{ model }}\n {{ groupby_gb_cols }}\n\n),\nb as (\n\n select \n {{select_gb_cols}}\n 1 as id_dbtutils_test_fewer_rows_than,\n count(*) as count_comparison_model \n from {{ compare_model }}\n {{ groupby_gb_cols }}\n\n),\ncounts as (\n\n select\n\n {% for c in group_by_columns -%}\n a.{{c}} as {{c}}_a,\n b.{{c}} as {{c}}_b,\n {% endfor %}\n\n count_our_model,\n count_comparison_model\n from a\n full join b on \n a.id_dbtutils_test_fewer_rows_than = b.id_dbtutils_test_fewer_rows_than\n {{ join_gb_cols }}\n\n),\nfinal as (\n\n select *,\n case\n -- fail the test if we have more rows than the reference model and return the row count delta\n when count_our_model > count_comparison_model then (count_our_model - count_comparison_model)\n -- fail the test if they are the same number\n when count_our_model = count_comparison_model then 1\n -- pass the test if the delta is positive (i.e. return the number 0)\n else 0\n end as row_count_delta\n from counts\n\n)\n\nselect * from final\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.49367, + "supported_languages": null + }, + "macro.dbt_utils.test_equal_rowcount": { + "name": "test_equal_rowcount", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/equal_rowcount.sql", + "original_file_path": "macros/generic_tests/equal_rowcount.sql", + "unique_id": "macro.dbt_utils.test_equal_rowcount", + "macro_sql": "{% test equal_rowcount(model, compare_model, group_by_columns = []) %}\n {{ return(adapter.dispatch('test_equal_rowcount', 'dbt_utils')(model, compare_model, group_by_columns)) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_utils.default__test_equal_rowcount"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4949286, + "supported_languages": null + }, + "macro.dbt_utils.default__test_equal_rowcount": { + "name": "default__test_equal_rowcount", + "resource_type": "macro", + "package_name": "dbt_utils", + "path": "macros/generic_tests/equal_rowcount.sql", + "original_file_path": "macros/generic_tests/equal_rowcount.sql", + "unique_id": "macro.dbt_utils.default__test_equal_rowcount", + "macro_sql": "{% macro default__test_equal_rowcount(model, compare_model, group_by_columns) %}\n\n{#-- Needs to be set at parse time, before we return '' below --#}\n{{ config(fail_calc = 'sum(coalesce(diff_count, 0))') }}\n\n{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}\n{%- if not execute -%}\n {{ return('') }}\n{% endif %}\n\n{% if group_by_columns|length() > 0 %}\n {% set select_gb_cols = group_by_columns|join(', ') + ', ' %}\n {% set join_gb_cols %}\n {% for c in group_by_columns %}\n and a.{{c}} = b.{{c}}\n {% endfor %}\n {% endset %}\n {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}\n{% endif %}\n\n{#-- We must add a fake join key in case additional grouping variables are not provided --#}\n{#-- Redshift does not allow for dynamically created join conditions (e.g. full join on 1 = 1 --#}\n{#-- The same logic is used in fewer_rows_than. In case of changes, maintain consistent logic --#}\n{% set group_by_columns = ['id_dbtutils_test_equal_rowcount'] + group_by_columns %}\n{% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}\n\nwith a as (\n\n select \n {{select_gb_cols}}\n 1 as id_dbtutils_test_equal_rowcount,\n count(*) as count_a \n from {{ model }}\n {{groupby_gb_cols}}\n\n\n),\nb as (\n\n select \n {{select_gb_cols}}\n 1 as id_dbtutils_test_equal_rowcount,\n count(*) as count_b \n from {{ compare_model }}\n {{groupby_gb_cols}}\n\n),\nfinal as (\n\n select\n \n {% for c in group_by_columns -%}\n a.{{c}} as {{c}}_a,\n b.{{c}} as {{c}}_b,\n {% endfor %}\n\n count_a,\n count_b,\n abs(count_a - count_b) as diff_count\n\n from a\n full join b\n on\n a.id_dbtutils_test_equal_rowcount = b.id_dbtutils_test_equal_rowcount\n {{join_gb_cols}}\n\n\n)\n\nselect * from final\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4968016, + "supported_languages": null + }, + "macro.nesso_macros.hash": { + "name": "hash", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/hash_column.sql", + "original_file_path": "macros/hash_column.sql", + "unique_id": "macro.nesso_macros.hash", + "macro_sql": "{%- macro hash(field) -%} {{ return(adapter.dispatch(\"hash\", \"dbt\")(field)) }} {%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.my_nesso_project.default__hash"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4975836, + "supported_languages": null + }, + "macro.nesso_macros.default__hash": { + "name": "default__hash", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/hash_column.sql", + "original_file_path": "macros/hash_column.sql", + "unique_id": "macro.nesso_macros.default__hash", + "macro_sql": "{%- macro default__hash(field) -%}\n md5(cast({{ adapter.quote(field) }} as {{ api.Column.translate_type(\"string\") }}))\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4979799, + "supported_languages": null + }, + "macro.nesso_macros.databricks__hash": { + "name": "databricks__hash", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/hash_column.sql", + "original_file_path": "macros/hash_column.sql", + "unique_id": "macro.nesso_macros.databricks__hash", + "macro_sql": "{%- macro databricks__hash(field) -%}\n sha2(cast({{ adapter.quote(field) }} as {{ api.Column.translate_type(\"string\") }}), 256)\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4983213, + "supported_languages": null + }, + "macro.nesso_macros.sqlserver__hash": { + "name": "sqlserver__hash", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/hash_column.sql", + "original_file_path": "macros/hash_column.sql", + "unique_id": "macro.nesso_macros.sqlserver__hash", + "macro_sql": "{%- macro sqlserver__hash(field) -%}\n HASHBYTES(\n 'SHA2_256', cast({{ adapter.quote(field) }} as {{ api.Column.translate_type(\"string\") }})\n )\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.4986622, + "supported_languages": null + }, + "macro.nesso_macros.create_description_markdown": { + "name": "create_description_markdown", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/create_description_markdown.sql", + "original_file_path": "macros/create_description_markdown.sql", + "unique_id": "macro.nesso_macros.create_description_markdown", + "macro_sql": "{% macro create_description_markdown(relation_name=none, docs_name=none, schema=none) %}\n\n {% if docs_name is none %} {% set docs_name = schema + \"_\" + relation_name %} {% endif %}\n\n {% if execute %}\n {{ print(\"{% docs \" + docs_name + \" %}\") }}\n {{ print(\"## `\" + relation_name + \"` table\") }}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcdd Details\") }}\n {{ print(\"-\") }}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcda External docs\") }}\n {{ print(\"-\") }}\n {{ print(\"{% enddocs %}\") }}\n {%- endif -%}\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.500304, + "supported_languages": null + }, + "macro.nesso_macros.print_profile_docs": { + "name": "print_profile_docs", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/dbt_profiler.sql", + "original_file_path": "macros/dbt_profiler.sql", + "unique_id": "macro.nesso_macros.print_profile_docs", + "macro_sql": "{% macro print_profile_docs(\n relation=none,\n relation_name=none,\n docs_name=none,\n schema=none,\n database=none,\n exclude_measures=[],\n include_columns=[],\n exclude_columns=[],\n max_rows=none,\n max_columns=13,\n max_column_width=30,\n max_precision=none\n) %}\n {%- set results = get_profile_table(\n relation=relation,\n relation_name=relation_name,\n schema=schema,\n database=database,\n exclude_measures=exclude_measures,\n include_columns=include_columns,\n exclude_columns=exclude_columns,\n ) -%}\n\n {% if docs_name is none %} {% set docs_name = schema + \"_\" + relation_name %} {% endif %}\n\n {% if execute %}\n {{ print(\"{% docs \" + docs_name + \" %}\") }}\n {{ print(\"## `\" + relation_name + \"` table\") }}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcdd Details\") }}\n {{ print(\"-\") }}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcca Profiling\") }}\n {% do results.print_table(\n max_rows=max_rows,\n max_columns=max_columns,\n max_column_width=max_column_width,\n max_precision=max_precision,\n ) %}\n\n {{ print(\"\") }}\n\n {{ print(\"### \ud83d\udcda External docs\") }}\n {{ print(\"-\") }}\n {{ print(\"{% enddocs %}\") }}\n {%- endif -%}\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": ["macro.nesso_macros.get_profile_table"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5297084, + "supported_languages": null + }, + "macro.nesso_macros.get_profile_table": { + "name": "get_profile_table", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/dbt_profiler.sql", + "original_file_path": "macros/dbt_profiler.sql", + "unique_id": "macro.nesso_macros.get_profile_table", + "macro_sql": "{% macro get_profile_table(\n relation=none,\n relation_name=none,\n schema=none,\n database=none,\n exclude_measures=[],\n include_columns=[],\n exclude_columns=[]\n) %}\n\n {%- set relation = dbt_profiler.get_relation(\n relation=relation, relation_name=relation_name, schema=schema, database=database\n ) -%}\n {%- set profile_sql = get_profile(\n relation=relation,\n exclude_measures=exclude_measures,\n include_columns=include_columns,\n exclude_columns=exclude_columns,\n ) -%}\n {{ log(profile_sql, info=False) }}\n {% set results = run_query(profile_sql) %}\n {% set results = results.rename(results.column_names | map(\"lower\")) %}\n {% do return(results) %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.get_relation", + "macro.nesso_macros.get_profile", + "macro.dbt.run_query" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5312316, + "supported_languages": null + }, + "macro.nesso_macros.databricks__type_string": { + "name": "databricks__type_string", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/dbt_profiler.sql", + "original_file_path": "macros/dbt_profiler.sql", + "unique_id": "macro.nesso_macros.databricks__type_string", + "macro_sql": "\n\n\n{%- macro databricks__type_string() -%} string {%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5314257, + "supported_languages": null + }, + "macro.nesso_macros.get_profile": { + "name": "get_profile", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/dbt_profiler.sql", + "original_file_path": "macros/dbt_profiler.sql", + "unique_id": "macro.nesso_macros.get_profile", + "macro_sql": "{% macro get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[]) %}\n {{\n return(\n adapter.dispatch(\"get_profile\", macro_namespace=\"dbt_profiler\")(\n relation, exclude_measures, include_columns, exclude_columns\n )\n )\n }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.my_nesso_project.default__get_profile"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5319006, + "supported_languages": null + }, + "macro.nesso_macros.default__get_profile": { + "name": "default__get_profile", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/dbt_profiler.sql", + "original_file_path": "macros/dbt_profiler.sql", + "unique_id": "macro.nesso_macros.default__get_profile", + "macro_sql": "{% macro default__get_profile(\n relation, exclude_measures=[], include_columns=[], exclude_columns=[]\n) %}\n\n {%- if include_columns and exclude_columns -%}\n {{\n exceptions.raise_compiler_error(\n \"Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.\"\n )\n }}\n {%- endif -%}\n\n {%- set all_measures = [\n \"row_count\",\n \"not_null_proportion\",\n \"distinct_proportion\",\n \"distinct_count\",\n \"is_unique\",\n \"min\",\n \"max\",\n \"avg\",\n \"std_dev_population\",\n \"std_dev_sample\",\n ] -%}\n\n {%- set include_measures = all_measures | reject(\"in\", exclude_measures) -%}\n\n {{ log(\"Include measures: \" ~ include_measures, info=False) }}\n\n {% if execute %}\n {% do dbt_profiler.assert_relation_exists(relation) %}\n\n {{ log(\"Get columns in relation %s\" | format(relation.include()), info=False) }}\n {%- set relation_columns = adapter.get_columns_in_relation(relation) -%}\n {%- set relation_column_names = relation_columns | map(attribute=\"name\") | list -%}\n {{ log(\"Relation columns: \" ~ relation_column_names | join(\", \"), info=False) }}\n\n {%- if include_columns -%}\n {%- set profile_column_names = (\n relation_column_names | select(\"in\", include_columns) | list\n ) -%}\n {%- elif exclude_columns -%}\n {%- set profile_column_names = (\n relation_column_names | reject(\"in\", exclude_columns) | list\n ) -%}\n {%- else -%} {%- set profile_column_names = relation_column_names -%}\n {%- endif -%}\n\n {{ log(\"Profile columns: \" ~ profile_column_names | join(\", \"), info=False) }}\n\n {% set information_schema_columns = run_query(\n dbt_profiler.select_from_information_schema_columns(relation)\n ) %}\n {% set information_schema_columns = information_schema_columns.rename(\n information_schema_columns.column_names | map(\"lower\")\n ) %}\n {% set information_schema_data_types = (\n information_schema_columns.columns[\"data_type\"].values() | map(\"lower\") | list\n ) %}\n {% set information_schema_column_names = (\n information_schema_columns.columns[\"column_name\"].values()\n | map(\"lower\")\n | list\n ) %}\n {% set data_type_map = {} %}\n {% for column_name in information_schema_column_names %}\n {% do data_type_map.update(\n {column_name: information_schema_data_types[loop.index - 1]}\n ) %}\n {% endfor %}\n {{ log(\"Column data types: \" ~ data_type_map, info=False) }}\n\n {% set profile_sql %}\n with source_data as (\n select\n *\n from {{ relation }}\n ),\n\n column_profiles as (\n {% for column_name in profile_column_names %}\n {% set data_type = data_type_map.get(column_name.lower(), \"\") %}\n select \n lower('{{ column_name }}') as column_name,\n nullif(lower('{{ data_type }}'), '') as data_type,\n {% if \"row_count\" not in exclude_measures -%}\n cast(count(*) as numeric) as row_count,\n {%- endif %}\n {% if \"not_null_proportion\" not in exclude_measures -%}\n sum(case when {{ adapter.quote(column_name) }} is null then 0 else 1 end) / cast(count(*) as numeric) as not_null_proportion,\n {%- endif %}\n {% if \"distinct_proportion\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) / cast(count(*) as numeric) as distinct_proportion,\n {%- endif %}\n {% if \"distinct_count\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) as distinct_count,\n {%- endif %}\n {% if \"is_unique\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) = count(*) as is_unique,\n {%- endif %}\n {% if \"min\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type) %}cast(min({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }}){% else %}null{% endif %} as min,\n {%- endif %}\n {% if \"max\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type) %}cast(max({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }}){% else %}null{% endif %} as max,\n {%- endif %}\n {% if \"avg\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}avg({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as avg,\n {%- endif %}\n {% if \"std_dev_population\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}stddev_pop({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as std_dev_population,\n {%- endif %}\n {% if \"std_dev_sample\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}stddev_samp({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as std_dev_sample,\n {%- endif %}\n cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at,\n {{ loop.index }} as _column_position\n from source_data\n\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n\n select\n column_name,\n data_type,\n {% for measure in include_measures %}\n {{ measure }},\n {% endfor %}\n profiled_at\n from column_profiles\n order by _column_position asc\n {% endset %}\n\n {% do return(profile_sql) %}\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.assert_relation_exists", + "macro.dbt.run_query", + "macro.dbt_profiler.select_from_information_schema_columns", + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_date_or_time_dtype", + "macro.dbt_profiler.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5395598, + "supported_languages": null + }, + "macro.nesso_macros.databricks__get_profile": { + "name": "databricks__get_profile", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/dbt_profiler.sql", + "original_file_path": "macros/dbt_profiler.sql", + "unique_id": "macro.nesso_macros.databricks__get_profile", + "macro_sql": "{% macro databricks__get_profile(\n relation, exclude_measures=[], include_columns=[], exclude_columns=[]\n) %}\n\n {%- if include_columns and exclude_columns -%}\n {{\n exceptions.raise_compiler_error(\n \"Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.\"\n )\n }}\n {%- endif -%}\n\n {%- set all_measures = [\n \"row_count\",\n \"not_null_proportion\",\n \"distinct_proportion\",\n \"distinct_count\",\n \"is_unique\",\n \"min\",\n \"max\",\n \"avg\",\n \"std_dev_population\",\n \"std_dev_sample\",\n ] -%}\n\n {%- set include_measures = all_measures | reject(\"in\", exclude_measures) -%}\n\n {{ log(\"Include measures: \" ~ include_measures, info=False) }}\n\n {% if execute %}\n {% do dbt_profiler.assert_relation_exists(relation) %}\n\n {{ log(\"Get columns in relation %s\" | format(relation.include()), info=False) }}\n {%- set relation_columns = adapter.get_columns_in_relation(relation) -%}\n {%- set relation_column_names = relation_columns | map(attribute=\"name\") | list -%}\n {{ log(\"Relation columns: \" ~ relation_column_names | join(\", \"), info=False) }}\n\n {%- if include_columns -%}\n {%- set profile_column_names = (\n relation_column_names | select(\"in\", include_columns) | list\n ) -%}\n {%- elif exclude_columns -%}\n {%- set profile_column_names = (\n relation_column_names | reject(\"in\", exclude_columns) | list\n ) -%}\n {%- else -%} {%- set profile_column_names = relation_column_names -%}\n {%- endif -%}\n\n {{ log(\"Profile columns: \" ~ profile_column_names | join(\", \"), info=False) }}\n\n {# Get column metadata. #}\n {% call statement(\"table_metadata\", fetch_result=True) -%}\n describe table extended {{ relation.schema }}.{{ relation.identifier }}\n {% endcall %}\n {% set columns_metadata = load_result('table_metadata').table %}\n {% set columns_metadata = columns_metadata.rename(columns_metadata.column_names | map('lower')) %}\n\n {% set data_types = columns_metadata.columns['data_type'].values() | map('lower') | list %}\n {% set column_names = columns_metadata.columns['col_name'].values() | map('lower') | list %}\n {% set data_type_map = {} %}\n {% for column_name in column_names %}\n {% do data_type_map.update({column_name: data_types[loop.index-1]}) %}\n {% endfor %}\n {{ log(\"Column data types: \" ~ data_type_map, info=False) }}\n\n {% set profile_sql %}\n with source_data as (\n select\n *\n from {{ relation }}\n ),\n\n column_profiles as (\n {% for column_name in profile_column_names %}\n {% set data_type = data_type_map.get(column_name.lower(), \"\") %}\n select \n lower('{{ column_name }}') as column_name,\n nullif(lower('{{ data_type }}'), '') as data_type,\n {% if \"row_count\" not in exclude_measures -%}\n cast(count(*) as numeric) as row_count,\n {%- endif %}\n {% if \"not_null_proportion\" not in exclude_measures -%}\n sum(case when {{ adapter.quote(column_name) }} is null then 0 else 1 end) / cast(count(*) as numeric) as not_null_proportion,\n {%- endif %}\n {% if \"distinct_proportion\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) / cast(count(*) as numeric) as distinct_proportion,\n {%- endif %}\n {% if \"distinct_count\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) as distinct_count,\n {%- endif %}\n {% if \"is_unique\" not in exclude_measures -%}\n count(distinct {{ adapter.quote(column_name) }}) = count(*) as is_unique,\n {%- endif %}\n {% if \"min\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type) %}cast(min({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }}){% else %}null{% endif %} as min,\n {%- endif %}\n {% if \"max\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) or dbt_profiler.is_date_or_time_dtype(data_type) %}cast(max({{ adapter.quote(column_name) }}) as {{ dbt_profiler.type_string() }}){% else %}null{% endif %} as max,\n {%- endif %}\n {% if \"avg\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}avg({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as avg,\n {%- endif %}\n {% if \"std_dev_population\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}stddev_pop({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as std_dev_population,\n {%- endif %}\n {% if \"std_dev_sample\" not in exclude_measures -%}\n {% if dbt_profiler.is_numeric_dtype(data_type) %}stddev_samp({{ adapter.quote(column_name) }}){% else %}cast(null as numeric){% endif %} as std_dev_sample,\n {%- endif %}\n cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at,\n {{ loop.index }} as _column_position\n from source_data\n\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n\n select\n column_name,\n data_type,\n {% for measure in include_measures %}\n {{ measure }},\n {% endfor %}\n profiled_at\n from column_profiles\n order by _column_position asc\n {% endset %}\n\n {# {{ print(profile_sql) }} #}\n {% do return(profile_sql) %}\n{% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_profiler.assert_relation_exists", + "macro.dbt.statement", + "macro.dbt_profiler.is_numeric_dtype", + "macro.dbt_profiler.is_date_or_time_dtype", + "macro.dbt_profiler.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5474527, + "supported_languages": null + }, + "macro.nesso_macros.generate_column_yaml": { + "name": "generate_column_yaml", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/generate_model_yaml_boilerplate.sql", + "original_file_path": "macros/generate_model_yaml_boilerplate.sql", + "unique_id": "macro.nesso_macros.generate_column_yaml", + "macro_sql": "{% macro generate_column_yaml(\n column,\n model_yaml,\n columns_metadata_dict,\n parent_column_name=\"\",\n include_pii_tag=True,\n include_data_types=True,\n snakecase_columns=True\n) %}\n {{ log(\"Generating YAML for column '\" ~ column.name ~ \"'...\") }}\n {% if parent_column_name %} {% set column_name = parent_column_name ~ \".\" ~ column.name %}\n {% else %} {% set column_name = column.name %}\n {% endif %}\n\n {% set column_metadata_dict = columns_metadata_dict.get(column.name, {}) %}\n {% if include_pii_tag %} {% set tags = column_metadata_dict.get(\"tags\", []) %}\n {% else %}\n {% set tags = column_metadata_dict.get(\"tags\", []) | reject(\"equalto\", \"PII\") | list %}\n {% endif %}\n\n {% if snakecase_columns %}\n {% do model_yaml.append(\" - name: \" ~ adapter.quote(snake_case(column.name))) %}\n {% else %} {% do model_yaml.append(\" - name: \" ~ adapter.quote(column.name)) %}\n {% endif %}\n {% do model_yaml.append(\" quote: true\") %}\n {% if include_data_types %}\n {% do model_yaml.append(\n \" data_type: \" ~ (column.data_type | upper)\n ) %}\n {% endif %}\n {% do model_yaml.append(\n ' description: \"' ~ column_metadata_dict.get(\"description\", \"\") ~ '\"'\n ) %}\n {% do model_yaml.append(\" # tests:\") %}\n {% do model_yaml.append(\" # - unique\") %}\n {% do model_yaml.append(\" # - not_null\") %}\n {% do model_yaml.append(\" tags: \" ~ tags) %}\n {% do model_yaml.append(\"\") %}\n\n {% if column.fields | length > 0 %}\n {% for child_column in column.fields %}\n {% set model_yaml = generate_column_yaml(\n child_column,\n model_yaml,\n column_metadata_dict,\n parent_column_name=column_name,\n ) %}\n {% endfor %}\n {% endif %}\n {% do return(model_yaml) %}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.nesso_macros.snake_case"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5560517, + "supported_languages": null + }, + "macro.nesso_macros.generate_model_yaml": { + "name": "generate_model_yaml", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/generate_model_yaml_boilerplate.sql", + "original_file_path": "macros/generate_model_yaml_boilerplate.sql", + "unique_id": "macro.nesso_macros.generate_model_yaml", + "macro_sql": "{% macro generate_model_yaml(\n model_name,\n technical_owner=\"None\",\n business_owner=\"None\",\n domains=[],\n source_systems=[],\n tags=[],\n upstream_metadata=True,\n include_sla=True,\n include_pii_tag=False,\n include_data_types=True,\n snakecase_columns=True,\n base_model_prefix=none,\n bootstrapped_base_model=False\n) %}\n {# \nGenerate model YAML template.\n\nArgs:\n model_name (str): The name of the model for which to generate the template.\n technical_owner (str, optional): The technical owner of the model.\n business_owner (str, optional): The business owner of the model.\n domains (List[str]): The domains the model belongs to.\n source_systems (List[str]): Sources from which the table originates, e.g., SQL Server, BigQuery, etc.\n tags (List[str]): The tags to attach to the model.\n upstream_metadata (bool, optional): Whether to inherit upstream model metadata.\n include_sla (bool, optional): Whether to include the SLA meta key.\n include_pii_tag (bool, optional): Whether to include the PII tag.\n include_data_types (bool, optional): Whether to include the data types of column.\n This may be useful when PII columns are already masked in the base model.\n snakecase_columns (bool, optional): Whether to standardize upstream column names\n to snakecase in the model.\n base_model_prefix (str, optional): Prefix to apply to the name of the base model.\n Defaults to empty string (no prefix).\n bootstrapped_base_model (bool, optional): Determines whether the base model was built using \n the `base_model bootstrap` command.\n#} \n \n {# Set to True to enable debugging. #}\n {% set info=False %}\n\n {{\n log(\n \"generate_model_yaml | Generating model YAML for model '\"\n ~ model_name\n ~ \"'...\",\n info=info\n )\n }}\n\n {% if upstream_metadata %}\n {% set upstream_model_metadata = get_parent_source_or_model_metadata(model_name) %}\n {{\n log(\n \"generate_model_yaml | Got upstream model metadata:\\n\\n\"\n ~ upstream_model_metadata\n ~ \"\\n\",\n info=info\n )\n }}\n {# {% set metadata_resolved = resolve_upstream_metadata(upstream_models_metadata) %}\n {{\n log(\n \"generate_model_yaml() | Resolved upstream metadata: \\n\\n\"\n ~ metadata_resolved\n ~ \"\\n\",\n info=info\n )\n }} #}\n {% else %}\n {# {% set metadata_resolved = {} %} #}\n {% set upstream_model_metadata = {} %}\n {% endif %}\n \n \n {% set dependencies = get_model_dependencies(model_name) %}\n {% set upstream_model_type = dependencies[\"type\"] %}\n\n {% if base_model_prefix is none %}\n {% set base_model_prefix = \"\" %}\n {% else %}\n {% if base_model_prefix and not base_model_prefix.endswith(\"_\") %}\n {% set base_model_prefix = base_model_prefix ~ \"_\" %}\n {% endif %}\n {% set model_name = base_model_prefix ~ model_name %}\n {% endif %}\n\n {{ log(\"generate_model_yaml | Base model prefix: \" ~ base_model_prefix, info=info) }}\n\n {# Table metadata. #}\n {% set model_yaml = [] %}\n {% do model_yaml.append(\"version: 2\") %}\n {% do model_yaml.append(\"\") %}\n {% do model_yaml.append(\"models:\") %}\n\n {% do model_yaml.append(\" - name: \" ~ model_name | lower) %}\n\n {% if upstream_model_type == \"source\" %}\n {% do model_yaml.append(\" description: Base model of the `\" ~ model_name | replace(base_model_prefix, \"\") ~ \"` table.\") %}\n {% else %} {% do model_yaml.append(' description: \"\"') %}\n {% endif %}\n\n {# {% set tags = metadata_resolved.get(\"tags\", tags) %}\n\n {% if tags %}\n {% do model_yaml.append(' config:')%}\n {% do model_yaml.append(' tags: ' ~ tags)%}\n {% endif %} #}\n\n {{ log(\"generate_model_yaml | Adding meta key...\", info=info) }}\n\n {% do model_yaml.append(\" meta:\") %}\n {% if upstream_model_metadata %}\n {% set meta = upstream_model_metadata.get(\"meta\", {}) %}\n {# {% set meta = metadata_resolved.get(\"meta\", {}) %} #}\n {% else %} {% set meta = {} %}\n {% endif %}\n\n {# Extract owners from metadata. #}\n {# Jinja forgets variables defined in loops -- but it has a concept of namespace as a workaround. #}\n {% set ns = namespace(technical_owner=technical_owner, business_owner=business_owner) %} \n\n {{ log(\"generate_model_yaml | Getting owner metadata...\", info=info) }}\n\n {% if (technical_owner == \"None\" or business_owner == \"None\") and meta %}\n\n {% for owner_meta in meta.get(\"owners\") %}\n {% set typ = owner_meta.get(\"type\") %}\n {% set email = owner_meta.get(\"email\") %}\n\n {% if typ == \"Technical owner\" %}\n {# {{ print(\"Setting technical owner to \" ~ email)}} #}\n {% if not technical_owner or technical_owner == \"None\" %}\n {% set ns.technical_owner = email %}\n {% endif %}\n {% elif typ == \"Business owner\" %}\n {# {{ print(\"Setting business owner to \" ~ email)}} #}\n {% if not business_owner or business_owner == \"None\" %}\n {% set ns.business_owner = email %}\n {% endif %}\n {% endif %}\n\n {% endfor %}\n {% endif %}\n\n {% do model_yaml.append(\" owners:\") %}\n {% do model_yaml.append(\" - type: Technical owner\") %}\n {% do model_yaml.append(\" email: \" ~ ns.technical_owner) %}\n {% do model_yaml.append(\" - type: Business owner\") %}\n {% do model_yaml.append(\" email: \" ~ ns.business_owner) %}\n {% do model_yaml.append(\" domains: \" ~ meta.get(\"domains\", domains)) %}\n {% do model_yaml.append(\" true_source: \" ~ meta.get(\"true_source\", source_systems)) %}\n\n {% if include_sla %}\n {% do model_yaml.append(\" SLA: \" ~ meta.get(\"SLA\", \"24 hours\")) %}\n {% endif %}\n\n {{ log(\"generate_model_yaml | Meta key added.\", info=info) }}\n\n {% do model_yaml.append(\" columns:\") %}\n\n {# Separates base models created using bootstrap command\n because they can multiple parent sources and models. #}\n {% if upstream_model_type == \"source\" and not bootstrapped_base_model %}\n {% set schema = dependencies[\"node\"].split(\".\")[-2] %}\n {% set relation = source(schema, model_name | replace(base_model_prefix, \"\")) %}\n {% else %} {% set relation = ref(model_name) %}\n {% endif %}\n\n {{ log(\"generate_model_yaml| Retrieving the list of columns...\", info=info) }}\n\n {%- set columns = adapter.get_columns_in_relation(relation) -%}\n\n {# Column metadata. #}\n {% if meta %}\n {{ log(\"generate_model_yaml | Retrieving column metadata...\", info=info) }}\n {% set columns_metadata_dict = (\n get_parent_source_or_model_column_metadata(\n model_name | replace(base_model_prefix, \"\")\n )\n if upstream_metadata\n else {}\n ) %}\n {{\n log(\n \"generate_model_yaml | Successfully retrieved column metadata:\\n\"\n ~ columns_metadata_dict,\n info=info\n )\n }}\n {% else %} {% set columns_metadata_dict = {} %}\n {% endif %}\n\n {{ log(\"generate_model_yaml | Generating column YAML...\", info=info) }}\n {% for column in columns %}\n {{ \n log(\n \"generate_model_yaml() | Generating YAML for column: \"\n ~ column,\n info=info\n )\n }}\n {% set model_yaml = generate_column_yaml(\n column,\n model_yaml,\n columns_metadata_dict,\n include_data_types=include_data_types,\n include_pii_tag=False,\n snakecase_columns=True,\n ) %}\n {{ log(\"generate_model_yaml() | Generated YAML: \" ~ model_yaml, info=info) }}\n {% endfor %}\n {{ log(\"generate_model_yaml | Successfully generated column YAML.\", info=info) }}\n \n {%- if execute -%}\n\n {%- set joined = model_yaml | join(\"\\n\") -%}\n\n {{ print(joined) }}\n {{ log(\"generate_model_yaml() | Final metadata:\\n\\n\" ~ joined, info=info) }}\n\n {%- do return(joined) -%}\n\n {%- endif -%}\n\n{%- endmacro -%}\n\n\n", + "depends_on": { + "macros": [ + "macro.nesso_macros.get_parent_source_or_model_metadata", + "macro.nesso_macros.get_model_dependencies", + "macro.nesso_macros.get_parent_source_or_model_column_metadata", + "macro.nesso_macros.generate_column_yaml" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.56548, + "supported_languages": null + }, + "macro.nesso_macros.resolve_upstream_metadata": { + "name": "resolve_upstream_metadata", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/generate_model_yaml_boilerplate.sql", + "original_file_path": "macros/generate_model_yaml_boilerplate.sql", + "unique_id": "macro.nesso_macros.resolve_upstream_metadata", + "macro_sql": "{% macro resolve_upstream_metadata(metadata) %}\n+ {# Set to True to enable logging to console #}\n+ {% set info = False %}\n+ {# \n+ Merge upstream metadata using the following logic:\n+ - fields of type string are taken from the first model in the list\n+ - fields of type list are merged together\n+ - for dict fields, same rules are applied to their subfields\n+ #}\n+\n+ {{ log(\"resolve_upstream_metadata() | Got metadata:\\n\\n\" ~ metadata ~ \"\\n\", info=info) }}\n+\n+ {% set metadata_resolved = {} %}\n+ {% for model_name in metadata %}\n+ {{ log(\"resolve_upstream_metadata() | Processing model '\" ~ model_name ~ \"'...\", info=info) }}\n+ {% set model_metadata = metadata[model_name] %}\n+\n+ {{ log(\"resolve_upstream_metadata() | Got model metadata: \\n\\n\" ~ model_metadata ~ \"\\n\", info=info) }}\n+\n+ {% for field in model_metadata %}\n+ {# Workaround because dbt jinja doesn't have the `continue` loop control. #}\n+ {% set continue_tracker = namespace(should_continue = True) %}\n+ {% set field_content = model_metadata[field] %}\n+ {% if field not in metadata_resolved %}\n+ {% do metadata_resolved.update({field: field_content}) %}\n+ {% else %}\n+ {% if field_content is string %}\n+ {# String - keep the value from the first encountered upstream,\n+ as there's no way to decide which is the correct one. #}\n+\n+ {{ log(\"resolve_upstream_metadata() | String field found: \" ~ field ~ \": \" ~ field_content, info=info) }}\n+ \n+ {% set continue_tracker.should_continue = False %}\n+ {% elif field_content is mapping and continue_tracker.should_continue %}\n+ {# A dictionary - merge the keys. #}\n+\n+ {{ log(\"resolve_upstream_metadata() | Dict field found: \" ~ field ~ \": \" ~ field_content, info=info) }}\n+\n+ {% for subfield in field_content %}\n+ {% set subfield_content = field_content[subfield] %}\n+ {% set continue_tracker2 = namespace(should_continue = True) %}\n+ {# Each key in the dictionary can also be a string, \n+ list, or dict. We apply the same rules as to top-level fields.#}\n+ {% if subfield_content is string %}\n+ {% set continue_tracker2.should_continue = False %}\n+ {% elif subfield_content is mapping and continue_tracker2.should_continue %}\n+ {% do metadata_resolved[field].update({subfield: subfield_content}) %}\n+ {% elif subfield_content is iterable and continue_tracker2.should_continue %}\n+ {% for key in subfield_content %}\n+ {% if key not in metadata_resolved[field][subfield] %}\n+ {% do metadata_resolved[field][subfield].append(key) %}\n+ {% endif %}\n+ {% endfor %}\n+ {% else %}\n+ {% do metadata_resolved[field].update({subfield: model_metadata[field]}) %} \n+ {% endif %}\n+ {% endfor %}\n+ {% elif field_content is iterable and continue_tracker.should_continue %}\n+ {# A list - append all unique items into the final list. #}\n+ \n+ {{ log(\"resolve_upstream_metadata() | List field found: \" ~ field ~ \": \" ~ field_content, info=info) }}\n+\n+ {% for key in field_content %}\n+ {% if key not in metadata_resolved[field] %}\n+ {% do metadata_resolved[field].append(key) %}\n+ {% endif %}\n+ {% endfor %}\n+ {% else %}\n+ {% do metadata_resolved.update({field: model_metadata[field]}) %} \n+ {% endif %}\n+ {% endif %}\n+ {% endfor %}\n+ {% endfor %}\n+\n+ {{ log(\"resolve_upstream_metadata() | Resolved metadata:\\n\\n\" ~ metadata_resolved ~ \"\\n\", info=info) }}\n+\n+ {% do return(metadata_resolved) %}\n+\n+{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.570872, + "supported_languages": null + }, + "macro.nesso_macros.get_tables_in_schema": { + "name": "get_tables_in_schema", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/generate_source_yaml_boilerplate.sql", + "original_file_path": "macros/generate_source_yaml_boilerplate.sql", + "unique_id": "macro.nesso_macros.get_tables_in_schema", + "macro_sql": "{% macro get_tables_in_schema(\n schema_name,\n database_name=target.database,\n table_pattern=\"%\",\n exclude=\"\",\n print_result=False\n) %}\n\n {% set tables = dbt_utils.get_relations_by_pattern(\n schema_pattern=schema_name,\n database=database_name,\n table_pattern=table_pattern,\n exclude=exclude,\n ) %}\n\n {% set table_list = tables | map(attribute=\"identifier\") %}\n\n {% if print_result %} {{ print(table_list | join(\",\")) }} {% endif %}\n\n {{ return(table_list | sort) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_utils.get_relations_by_pattern"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5754898, + "supported_languages": null + }, + "macro.nesso_macros.generate_source": { + "name": "generate_source", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/generate_source_yaml_boilerplate.sql", + "original_file_path": "macros/generate_source_yaml_boilerplate.sql", + "unique_id": "macro.nesso_macros.generate_source", + "macro_sql": "{% macro generate_source(\n schema_name,\n technical_owner=none,\n business_owner=none,\n domains=[],\n source_systems=[],\n database_name=target.database,\n generate_columns=True,\n include_descriptions=True,\n include_data_types=True,\n include_table_profiling=True,\n include_sla=True,\n include_freshness=True,\n loaded_at_field=\"_viadot_downloaded_at_utc::timestamp\",\n freshness={\n \"warn_after\": \"{ count: 24, period: hour }\",\n \"error_after\": \"{ count: 48, period: hour }\",\n },\n table_pattern=\"%\",\n exclude=\"\",\n name=schema_name,\n table_names=None,\n case_sensitive_cols=True\n) %}\n {# The default table_pattern is adapted to the postgres database. Make sure it also matches the database you intend to use #}\n ,\n\n {% set sources_yaml = [] %}\n\n {% if table_names is none %}\n {% do sources_yaml.append(\"version: 2\") %}\n {% do sources_yaml.append(\"\") %}\n {% do sources_yaml.append(\"sources:\") %}\n {% do sources_yaml.append(\" - name: \" ~ name | lower) %}\n\n {% if database_name != target.database %}\n {% do sources_yaml.append(\" database: \" ~ database_name | lower) %}\n {% endif %}\n\n {% do sources_yaml.append(\" schema: \" ~ schema_name | lower) %}\n {% if include_descriptions %}\n {% do sources_yaml.append(' description: \"\"') %}\n {% endif %}\n {% do sources_yaml.append(\"\\n tables:\") %}\n\n {% set tables = get_tables_in_schema(schema_name, database_name, table_pattern, exclude) %}\n {% else %} {% set tables = table_names %}\n\n {% endif %}\n\n {% if table_names %} {% do sources_yaml.append(\"\") %} {% endif %}\n\n {% for table in tables %}\n {% do sources_yaml.append(\"\\n - name: \" ~ table | lower) %}\n {% if include_descriptions %}\n \n {% if include_table_profiling %}\n {# Note that the doc must already exist. You can generate it beforehand with dbt-profiler. #}\n {% do sources_yaml.append(' description: ' ~ \"'\" ~ '{{ doc(\"' ~ schema_name ~ \"_\" ~ table ~ '\") }}' ~ \"'\") %}\n {% else %}\n {% do sources_yaml.append(' description: \"\"') %}\n {% endif %}\n\n {% endif %}\n\n {% if include_freshness %}\n {% do sources_yaml.append(\" loaded_at_field: \" ~ loaded_at_field) %}\n {% do sources_yaml.append(\" freshness:\") %}\n {% do sources_yaml.append(\" warn_after: \" ~ freshness.get(\"warn_after\", \"\")) %}\n {% do sources_yaml.append(\n \" error_after: \" ~ freshness.get(\"error_after\", \"\")\n ) %}\n {% endif %}\n\n {% do sources_yaml.append(\" tags: []\") %}\n\n {% do sources_yaml.append(\" meta:\") %}\n {% do sources_yaml.append(\" owners:\") %}\n {% do sources_yaml.append(\" - type: Technical owner\") %}\n {% do sources_yaml.append(\" email: \" ~ technical_owner) %}\n {% do sources_yaml.append(\" - type: Business owner\") %}\n {% do sources_yaml.append(\" email: \" ~ business_owner) %}\n {% do sources_yaml.append(\" domains: \" ~ domains) %}\n {% do sources_yaml.append(\" true_source: \" ~ source_systems) %}\n\n {% if include_sla %} {% do sources_yaml.append(' SLA: \"24 hours\"') %} {% endif %}\n\n {% if generate_columns %}\n {% do sources_yaml.append(\" columns:\") %}\n\n {% set table_relation = api.Relation.create(\n database=database_name, schema=schema_name, identifier=table\n ) %}\n\n {% set columns = adapter.get_columns_in_relation(table_relation) %}\n {% for column in columns %}\n {% if case_sensitive_cols %}\n {% do sources_yaml.append(\" - name: \" ~ adapter.quote(column.name)) %}\n {% else %}\n {% do sources_yaml.append(\n \" - name: \" ~ adapter.quote(column.name) | lower\n ) %}\n {% endif %}\n {% do sources_yaml.append(\" quote: true\") %}\n {% if include_data_types %}\n {% do sources_yaml.append(\n \" data_type: \" ~ (column.data_type | upper)\n ) %}\n {% endif %}\n {% if include_descriptions %}\n {% do sources_yaml.append(' description: \"\"') %}\n {% endif %}\n {% do sources_yaml.append(\" # tests:\") %}\n {% do sources_yaml.append(\" # - unique\") %}\n {% do sources_yaml.append(\" # - not_null\") %}\n {% do sources_yaml.append(\" tags: []\") %}\n {% endfor %}\n {% endif %}\n\n {% endfor %}\n\n {% if execute %}\n\n {% set joined = sources_yaml | join(\"\\n\") %} {{ print(joined) }} {% do return(joined) %}\n\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.nesso_macros.get_tables_in_schema"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5824294, + "supported_languages": null + }, + "macro.nesso_macros.generate_schema_name": { + "name": "generate_schema_name", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/get_custom_schema.sql", + "original_file_path": "macros/get_custom_schema.sql", + "unique_id": "macro.nesso_macros.generate_schema_name", + "macro_sql": "{% macro generate_schema_name(custom_schema_name, node) -%}\n {{ generate_schema_name_for_env(custom_schema_name, node) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.generate_schema_name_for_env"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.582798, + "supported_languages": null + }, + "macro.nesso_macros.get_table_columns": { + "name": "get_table_columns", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/get_table_columns.sql", + "original_file_path": "macros/get_table_columns.sql", + "unique_id": "macro.nesso_macros.get_table_columns", + "macro_sql": "{%- macro get_table_columns(schema_name, table_name, database_name=target.database) -%}\n\n {% set table_relation = api.Relation.create(\n schema=schema_name, identifier=table_name, database=database_name\n ) %}\n\n {% set columns = adapter.get_columns_in_relation(table_relation) %}\n\n\n {% set columns_dict = {} %}\n {% for column in columns %}\n {% set column_name = column.name %}\n {% set data_type = column.data_type | upper %}\n {% do columns_dict.update({column_name: data_type})%}\n {% endfor %}\n\n {% if execute %}\n\n {{ print(columns_dict) }} {% do return(columns_dict) %}\n\n {% endif %}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.584217, + "supported_languages": null + }, + "macro.nesso_macros.get_source_pii_columns": { + "name": "get_source_pii_columns", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/get_source_pii_columns.sql", + "original_file_path": "macros/get_source_pii_columns.sql", + "unique_id": "macro.nesso_macros.get_source_pii_columns", + "macro_sql": "{% macro get_source_pii_columns(dbt_project, schema, table) %}\n\n {% if execute %}\n\n {% set meta_columns = [] %}\n {% set fqname = \"source\" ~ \".\" ~ dbt_project ~ \".\" ~ schema ~ \".\" ~ table %}\n {% set columns = graph.sources[fqname][\"columns\"] %}\n\n {% for column in columns %}\n {% if \"PII\" in graph.sources[fqname][\"columns\"][column][\"tags\"] %}\n {% do meta_columns.append(column) %}\n {% endif %}\n {% endfor %}\n\n {{ return(meta_columns) }}\n\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.585522, + "supported_languages": null + }, + "macro.nesso_macros.generate_base_model": { + "name": "generate_base_model", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/generate_base_model.sql", + "original_file_path": "macros/generate_base_model.sql", + "unique_id": "macro.nesso_macros.generate_base_model", + "macro_sql": "{% macro generate_base_model(\n source_name, table_name, dbt_project, snakecase_columns=False, leading_commas=False\n) %}\n\n {%- set source_relation = source(source_name, table_name) -%}\n\n {%- set columns = adapter.get_columns_in_relation(source_relation) -%}\n {%- set column_names = columns | map(attribute=\"name\") -%}\n\n {%- set base_model_sql -%}\nwith _masked as (\n select {{ '\\n ' ~ hash_source_pii_columns(table=table_name, schema=source_name, dbt_project=dbt_project ) | trim }}\n from {{ \"{{ source(\" ~ '\"' ~ source_name ~ '\"' ~ \", \" ~ '\"' ~ table_name ~ '\"' ~ \") }}\" }}\n),\n\nrenamed as (\n select\n {%- if leading_commas -%}\n {%- for column in column_names %}\n {{\", \" if not loop.first}}\n {% if snakecase_columns %}\n {{ adapter.quote(column) ~ ' as ' ~ adapter.quote(snake_case(column)) }}\n {% else %}\n {{ adapter.quote(column) }}\n {% endif %}\n {%- endfor %}\n {% else %}\n {% for column in column_names %}\n {%- if snakecase_columns -%}\n {{ adapter.quote(column) ~ ' as ' ~ adapter.quote(snake_case(column)) }}\n {%- else -%}\n {{ adapter.quote(column) }}\n {%- endif -%}\n {{\",\" if not loop.last}}\n {% endfor %}\n {%- endif %}\n from _masked\n)\n\nselect * from renamed\n {%- endset -%}\n\n {% if execute %} {{ print(base_model_sql) }} {% do return(base_model_sql) %} {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.nesso_macros.hash_source_pii_columns", + "macro.nesso_macros.snake_case" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.588609, + "supported_languages": null + }, + "macro.nesso_macros.hash_source_pii_columns": { + "name": "hash_source_pii_columns", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/hash_source_pii_columns.sql", + "original_file_path": "macros/hash_source_pii_columns.sql", + "unique_id": "macro.nesso_macros.hash_source_pii_columns", + "macro_sql": "{%- macro hash_source_pii_columns(dbt_project, schema, table=None) -%}\n\n {%- set pii_columns = get_source_pii_columns(\n dbt_project=dbt_project, schema=schema, table=table\n ) -%}\n\n {% for column in pii_columns %}\n {{ hash(column) | indent(4) }} as {{ adapter.quote(column) }},\n {{ \"\\n\" if not loop.last else \"\\n \" }}\n {%- endfor -%}\n {{ dbt_utils.star(from=source(schema, table), except=pii_columns) | indent(4) | trim }}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.nesso_macros.get_source_pii_columns", + "macro.nesso_macros.hash", + "macro.dbt_utils.star" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5896792, + "supported_languages": null + }, + "macro.nesso_macros.generate_seed_schema_yaml": { + "name": "generate_seed_schema_yaml", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/generate_seed_yaml_boilerplate.sql", + "original_file_path": "macros/generate_seed_yaml_boilerplate.sql", + "unique_id": "macro.nesso_macros.generate_seed_schema_yaml", + "macro_sql": "{% macro generate_seed_schema_yaml() %}\n\n {% set yaml = [] %}\n {% do yaml.append(\"version: 2\") %}\n {% do yaml.append(\"\") %}\n {% do yaml.append(\"seeds: []\") %}\n\n {% if execute %}\n {% set joined = yaml | join(\"\\n\") %} {{ print(joined) }} {% do return(joined) %}\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.592552, + "supported_languages": null + }, + "macro.nesso_macros.generate_seed_yaml": { + "name": "generate_seed_yaml", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/generate_seed_yaml_boilerplate.sql", + "original_file_path": "macros/generate_seed_yaml_boilerplate.sql", + "unique_id": "macro.nesso_macros.generate_seed_yaml", + "macro_sql": "{% macro generate_seed_yaml(\n seed,\n database_name=target.database,\n schema_name=target.schema,\n generate_columns=True,\n include_tags=False,\n include_owners=True,\n technical_owner=\"\",\n business_owner=\"\",\n domains=[],\n source_systems=[],\n case_sensitive_cols=True\n) %}\n\n {% set yaml = [] %}\n\n {% do yaml.append(\" - name: \" ~ seed | lower) %}\n {% do yaml.append(' description: \"\"') %}\n\n {% if include_tags %} {% do yaml.append(\" tags: []\") %} {% endif %}\n\n {% if include_owners %}\n {% do yaml.append(\" meta:\") %}\n {% do yaml.append(\" owners:\") %}\n {% do yaml.append(\" - type: Technical owner\") %}\n {% do yaml.append(\" email: \" ~ technical_owner) %}\n {% do yaml.append(\" - type: Business owner\") %}\n {% do yaml.append(\" email: \" ~ business_owner) %}\n {% do yaml.append(\" domains: \" ~ domains) %}\n {% do yaml.append(\" true_source: \" ~ source_systems) %}\n {% endif %}\n\n {% if generate_columns %}\n {% do yaml.append(\" columns:\") %}\n\n {% set table_relation = api.Relation.create(\n database=database_name, schema=schema_name, identifier=seed\n ) %}\n {% set columns = adapter.get_columns_in_relation(table_relation) %}\n {% for column in columns %}\n {% if case_sensitive_cols %}\n {% do yaml.append(\" - name: \" ~ column.name) %}\n {% do yaml.append(\" quote: true\") %}\n {% else %} {% do yaml.append(\" - name: \" ~ column.name | lower) %}\n {% endif %}\n {% do yaml.append(' description: \"\"') %}\n {% do yaml.append(\" # tests:\") %}\n {% do yaml.append(\" # - unique\") %}\n {% do yaml.append(\" # - not_null\") %}\n {% do yaml.append(\" # - accepted_values:\") %}\n {% do yaml.append(' # values: [\"value1\", \"value2\"]') %}\n {% endfor %}\n\n {% endif %}\n\n {% if execute %}\n {% set joined = yaml | join(\"\\n\") %} {{ print(joined) }} {% do return(joined) %}\n {% endif %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.596341, + "supported_languages": null + }, + "macro.nesso_macros.redshift__list_relations_without_caching": { + "name": "redshift__list_relations_without_caching", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/redshift_external_tables_fix.sql", + "original_file_path": "macros/redshift_external_tables_fix.sql", + "unique_id": "macro.nesso_macros.redshift__list_relations_without_caching", + "macro_sql": "{% macro redshift__list_relations_without_caching(schema_relation) %}\n\n {% call statement('list_relations_without_caching', fetch_result=True) -%}\n select\n table_catalog as database,\n table_name as name,\n table_schema as schema,\n 'table' as type\n from information_schema.tables\n where table_schema ilike '{{ schema_relation.schema }}'\n and table_type = 'BASE TABLE'\n union all\n select\n table_catalog as database,\n table_name as name,\n table_schema as schema,\n case\n when view_definition ilike '%create materialized view%'\n then 'materialized_view'\n else 'view'\n end as type\n from information_schema.views\n where table_schema ilike '{{ schema_relation.schema }}'\n union all\n select \n redshift_database_name as database,\n tablename as name,\n schemaname as schema,\n 'table' as type\n from svv_external_tables\n where schemaname ilike '{{ schema_relation.schema }}'\n {% endcall %}\n\n {{ return(load_result('list_relations_without_caching').table) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.5973108, + "supported_languages": null + }, + "macro.nesso_macros.snake_case": { + "name": "snake_case", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/codegen_helpers.sql", + "original_file_path": "macros/codegen_helpers.sql", + "unique_id": "macro.nesso_macros.snake_case", + "macro_sql": "{%- macro snake_case(s) -%} {{ s | replace(\" \", \"_\") | replace(\"-\", \"_\") | lower }} {%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6006238, + "supported_languages": null + }, + "macro.nesso_macros.get_model_dependencies": { + "name": "get_model_dependencies", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/codegen_helpers.sql", + "original_file_path": "macros/codegen_helpers.sql", + "unique_id": "macro.nesso_macros.get_model_dependencies", + "macro_sql": "{% macro get_model_dependencies(model_name) %}\n {# Set to True to enable logging #}\n {% set info=False %}\n\n {{ \n log(\n \"get_model_dependencies | Getting upstream dependencies for model '\"\n ~ model_name\n ~ \"'...\",\n info=info\n )\n }}\n\n {% set upstream_fqns = [] %}\n\n {{ log(\"get_model_dependencies | Checking upstream models...\", info=info) }}\n {% for node in graph.nodes.values() | selectattr(\"name\", \"equalto\", model_name) %}\n {% if node.depends_on.nodes and not \"source.\" in node.depends_on.nodes[0] %}\n {# The node depends on another model. #}\n {{ \n log(\n \"get_model_dependencies | Got the following dependencies: \"\n ~ node.depends_on.nodes\n ~ \".\",\n info=info\n )\n }}\n {{ return({\"type\": \"model\", \"nodes\": node.depends_on.nodes}) }}\n {% endif %}\n {% endfor %}\n\n {{ log(\"get_model_dependencies | Checking upstream source...\", info=info) }}\n {% for node in graph.sources.values() | selectattr(\"name\", \"equalto\", model_name) %}\n {{ \n log(\n \"get_model_dependencies | Got the following dependencies: \" ~ node, info=info\n )\n }}\n {{ return({\"type\": \"source\", \"node\": node.unique_id}) }}\n {% endfor %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6027026, + "supported_languages": null + }, + "macro.nesso_macros.get_source_or_model_column_metadata": { + "name": "get_source_or_model_column_metadata", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/codegen_helpers.sql", + "original_file_path": "macros/codegen_helpers.sql", + "unique_id": "macro.nesso_macros.get_source_or_model_column_metadata", + "macro_sql": "{% macro get_source_or_model_column_metadata(model_name, model_type=\"model\") %}\n {# \nGet column metadata (description and tags) for a model or source.\n\nReturns: Dict[str, Dict[str, Any]]\n\nExample:\n>>> dbt run-operation get_source_or_model_column_metadata --args '{\"model_name\": \"c4c_contact\", \"model_type\": \"model\"}'\n>>> {\"id\": {\"description\": \"A\", \"tags\": []}}\n#}\n {% if model_type == \"model\" %} {% set nodes = graph.nodes.values() %}\n {% else %} {% set nodes = graph.sources.values() %}\n {% endif %}\n\n {% set columns_metadata_dict = {} %}\n {% for node in nodes | selectattr(\"name\", \"equalto\", model_name) %}\n {% for col_name, col_values in node.columns.items() %}\n {% do columns_metadata_dict.update(\n {\n col_name: {\n \"description\": col_values.description,\n \"tags\": col_values.tags,\n }\n }\n ) %}\n {% endfor %}\n {% endfor %}\n\n {{ return(columns_metadata_dict) }}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6041276, + "supported_languages": null + }, + "macro.nesso_macros.get_parent_source_or_model_column_metadata": { + "name": "get_parent_source_or_model_column_metadata", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/codegen_helpers.sql", + "original_file_path": "macros/codegen_helpers.sql", + "unique_id": "macro.nesso_macros.get_parent_source_or_model_column_metadata", + "macro_sql": "{% macro get_parent_source_or_model_column_metadata(model_name) %}\n {# \nGet column metadata (description and tags) for the model's or source's\nparent source or model.\n\nThis is useful for automatically populating YAML files of downstream models\nwith the information already provided in upstream (for example, if a view\nuses a field from a source amd this field's description is already available \nin the source's YAML file).\n\nNote that if the same column name exists in multiple upstream models, \nthe description will be overwritten at each loop and the final one\nwill be taken from the model that happens to be the last in the loop. \n\nReturns: Dict[str, Dict[str, Any]]\n\nExample:\n>>> dbt run-operation get_parent_source_or_model_column_metadata --args '{\"model_name\": \"c4c_contact\"}'\n>>> {\"id\": {\"description\": \"B\", \"tags\": []}}\n#}\n {# Set to True to enable logging to console #}\n {% set info = False %}\n\n {{\n log(\n \"get_parent_source_or_model_column_metadata | Getting column-level metadata for \"\n ~ model_type\n ~ \" '\"\n ~ model_name\n ~ \"'...\",\n info=info\n )\n }}\n\n {% if execute %}\n {% set dependencies = get_model_dependencies(model_name) %}\n {% set model_type = dependencies[\"type\"] %}\n\n {# Note we immediately return `column_metadata`, as outside the if/else, it's magically set to None. #}\n {% if model_type == \"model\" %}\n {% for full_model in dependencies[\"nodes\"] %}\n {% set upstream_model_name = full_model.split(\".\")[-1] %}\n {% set column_metadata = get_source_or_model_column_metadata(\n model_name=upstream_model_name, model_type=model_type\n ) %}\n {{\n log(\n \"get_parent_source_or_model_column_metadata() | Got model column metadata:\\n\\n\"\n ~ column_metadata\n ~ \"\\n\",\n info=info\n )\n }}\n {{ return(column_metadata) }}\n {% endfor %}\n {% endif %}\n\n {% if model_type == \"source\" %}\n {% set upstream_model_name = dependencies[\"node\"].split(\".\")[-1] %}\n {% set column_metadata = get_source_or_model_column_metadata(\n model_name=upstream_model_name, model_type=model_type\n ) %}\n {{\n log(\n \"get_parent_source_or_model_column_metadata() | Got source column metadata:\\n\\n\"\n ~ column_metadata\n ~ \"\\n\",\n info=info\n )\n }}\n {{ return(column_metadata) }}\n {% endif %}\n\n {% endif %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.nesso_macros.get_model_dependencies", + "macro.nesso_macros.get_source_or_model_column_metadata" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6063347, + "supported_languages": null + }, + "macro.nesso_macros.get_source_or_model_metadata": { + "name": "get_source_or_model_metadata", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/codegen_helpers.sql", + "original_file_path": "macros/codegen_helpers.sql", + "unique_id": "macro.nesso_macros.get_source_or_model_metadata", + "macro_sql": "{% macro get_source_or_model_metadata(model_name, model_type=\"model\") %}\n {# \nGet table metadata (description, tags, and meta) for a model or source.\n\nNote that if there are multiple upstream models, the metadata will\nbe overwritten at each loop and the final one will be taken from the model \nthat happens to be the last in the loop. \n\nReturns: Dict[str, Union[str, List[str], Dict[str, Any]]]\n\nExample:\n>>> dbt run-operation get_source_or_model_metadata --args '{\"model_name\": \"c4c_contact\", \"model_type\": \"model\"}'\n>>> {\"description\": \"A\", \"tags\": [], \"meta\": {\"owner\": js@example.com}}\n#}\n {# Set to True to enable debugging #}\n {% set info = False %}\n\n {{ \n log(\n \"get_source_or_model_metadata() | Getting model-level metadata for \" \n ~ model_type \n ~ \" '\" \n ~ model_name \n ~ \"'...\",\n info=info\n )\n }}\n\n {% if model_type == \"model\" %} {% set nodes = graph.nodes.values() %}\n {% else %} {% set nodes = graph.sources.values() %}\n {% endif %}\n\n {% set table_metadata_dict = {} %}\n {% for node in nodes | selectattr(\"name\", \"equalto\", model_name) %}\n {{ log(node, info=info) }}\n {% do table_metadata_dict.update(\n {\"description\": node.description, \"tags\": node.tags, \"meta\": node.meta}\n ) %}\n {% endfor %}\n\n {{\n log(\n \"get_source_or_model_metadata() | Successfully retrieved model-level metadata for \"\n ~ model_type\n ~ \" '\"\n ~ model_name\n ~ \"':\\n\"\n ~ table_metadata_dict,\n info=info\n )\n }}\n\n {{ return(table_metadata_dict) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6083736, + "supported_languages": null + }, + "macro.nesso_macros.get_parent_source_or_model_metadata": { + "name": "get_parent_source_or_model_metadata", + "resource_type": "macro", + "package_name": "nesso_macros", + "path": "macros/codegen_helpers.sql", + "original_file_path": "macros/codegen_helpers.sql", + "unique_id": "macro.nesso_macros.get_parent_source_or_model_metadata", + "macro_sql": "{% macro get_parent_source_or_model_metadata(model_name) %}\n{#\nGet table metadata (description, tags, and meta) for the model's parent\nsource(s) and/or model(s).\n\nThis is useful for automatically populating YAML files of downstream models\nwith the information already provided in upstream (eg. when defining\nbase views).\n\nReturns: Dict[str, Union[str, List[str], Dict[str, Any]]]\n\nExample:\n>>> dbt run-operation get_parent_source_or_model_metadata --args '{\"model_name\": \"c4c_contact\"}'\n>>> {\"description\": \"B\", \"tags\": [], \"meta\": {\"owner\": js@example.com}}\n#}\n {% if execute %}\n\n {# Set to True to enable debugging. #}\n {% set info=False %}\n\n {{ log(\"get_parent_source_or_model_metadata | Getting upstream metadata...\", info=info) }}\n\n {% set dependencies = get_model_dependencies(model_name) %}\n {{\n log(\n \"get_parent_source_or_model_metadata() | Got the following dependencies: \"\n ~ dependencies,\n info=info\n )\n }}\n {% set model_type = dependencies[\"type\"] %}\n \n {# Note we immediately return `model_metadata`, as outside the if/else, it's magically set to None. #}\n {% if model_type == \"model\" %}\n {% for full_model in dependencies[\"nodes\"] %}\n {% set model_name = full_model.split(\".\")[-1] %}\n {% set model_metadata = get_source_or_model_metadata(\n model_name, model_type=model_type\n ) %}\n {% do return(model_metadata) %}\n {% endfor %}\n {% elif model_type == \"source\" %}\n {% set model_name = dependencies[\"node\"].split(\".\")[-1] %}\n {% set model_metadata = get_source_or_model_metadata(\n model_name, model_type=model_type\n ) %}\n {{\n log(\n \"get_parent_source_or_model_metadata| Got the following upstream sources:\\n\"\n ~ model_metadata,\n info=info\n )\n }}\n {% do return(model_metadata) %}\n {% else %} \n {{\n log(\n \"get_parent_source_or_model_metadata| Incorrect model type (\"\n ~ model_type\n ~ \").\",\n info=info\n )\n }}\n {% set model_metadata = {} %}\n {% do return(model_metadata) %}\n {% endif %}\n\n {{ log(\"get_parent_source_or_model_metadata | Finishing...\", info=info) }}\n {{ log(\"\", info=info) }}\n\n {% endif %}\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.nesso_macros.get_model_dependencies", + "macro.nesso_macros.get_source_or_model_metadata" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6108928, + "supported_languages": null + }, + "macro.dbt_date.get_base_dates": { + "name": "get_base_dates", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/get_base_dates.sql", + "original_file_path": "macros/get_base_dates.sql", + "unique_id": "macro.dbt_date.get_base_dates", + "macro_sql": "{% macro get_base_dates(start_date=None, end_date=None, n_dateparts=None, datepart=\"day\") %}\n {{ adapter.dispatch('get_base_dates', 'dbt_date') (start_date, end_date, n_dateparts, datepart) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__get_base_dates"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6128147, + "supported_languages": null + }, + "macro.dbt_date.default__get_base_dates": { + "name": "default__get_base_dates", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/get_base_dates.sql", + "original_file_path": "macros/get_base_dates.sql", + "unique_id": "macro.dbt_date.default__get_base_dates", + "macro_sql": "{% macro default__get_base_dates(start_date, end_date, n_dateparts, datepart) %}\n\n{%- if start_date and end_date -%}\n{%- set start_date=\"cast('\" ~ start_date ~ \"' as \" ~ dbt.type_timestamp() ~ \")\" -%}\n{%- set end_date=\"cast('\" ~ end_date ~ \"' as \" ~ dbt.type_timestamp() ~ \")\" -%}\n\n{%- elif n_dateparts and datepart -%}\n\n{%- set start_date = dbt.dateadd(datepart, -1 * n_dateparts, dbt_date.today()) -%}\n{%- set end_date = dbt_date.tomorrow() -%}\n{%- endif -%}\n\nwith date_spine as\n(\n\n {{ dbt_date.date_spine(\n datepart=datepart,\n start_date=start_date,\n end_date=end_date,\n )\n }}\n\n)\nselect\n cast(d.date_{{ datepart }} as {{ dbt.type_timestamp() }}) as date_{{ datepart }}\nfrom\n date_spine d\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.type_timestamp", + "macro.dbt.dateadd", + "macro.dbt_date.today", + "macro.dbt_date.tomorrow", + "macro.dbt_date.date_spine" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6140993, + "supported_languages": null + }, + "macro.dbt_date.bigquery__get_base_dates": { + "name": "bigquery__get_base_dates", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/get_base_dates.sql", + "original_file_path": "macros/get_base_dates.sql", + "unique_id": "macro.dbt_date.bigquery__get_base_dates", + "macro_sql": "{% macro bigquery__get_base_dates(start_date, end_date, n_dateparts, datepart) %}\n\n{%- if start_date and end_date -%}\n{%- set start_date=\"cast('\" ~ start_date ~ \"' as datetime )\" -%}\n{%- set end_date=\"cast('\" ~ end_date ~ \"' as datetime )\" -%}\n\n{%- elif n_dateparts and datepart -%}\n\n{%- set start_date = dbt.dateadd(datepart, -1 * n_dateparts, dbt_date.today()) -%}\n{%- set end_date = dbt_date.tomorrow() -%}\n{%- endif -%}\n\nwith date_spine as\n(\n\n {{ dbt_date.date_spine(\n datepart=datepart,\n start_date=start_date,\n end_date=end_date,\n )\n }}\n\n)\nselect\n cast(d.date_{{ datepart }} as {{ dbt.type_timestamp() }}) as date_{{ datepart }}\nfrom\n date_spine d\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.dateadd", + "macro.dbt_date.today", + "macro.dbt_date.tomorrow", + "macro.dbt_date.date_spine", + "macro.dbt.type_timestamp" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.61532, + "supported_languages": null + }, + "macro.dbt_date.trino__get_base_dates": { + "name": "trino__get_base_dates", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/get_base_dates.sql", + "original_file_path": "macros/get_base_dates.sql", + "unique_id": "macro.dbt_date.trino__get_base_dates", + "macro_sql": "{% macro trino__get_base_dates(start_date, end_date, n_dateparts, datepart) %}\n\n{%- if start_date and end_date -%}\n{%- set start_date=\"cast('\" ~ start_date ~ \"' as \" ~ dbt.type_timestamp() ~ \")\" -%}\n{%- set end_date=\"cast('\" ~ end_date ~ \"' as \" ~ dbt.type_timestamp() ~ \")\" -%}\n\n{%- elif n_dateparts and datepart -%}\n\n{%- set start_date = dbt.dateadd(datepart, -1 * n_dateparts, dbt_date.now()) -%}\n{%- set end_date = dbt_date.tomorrow() -%}\n{%- endif -%}\n\nwith date_spine as\n(\n\n {{ dbt_date.date_spine(\n datepart=datepart,\n start_date=start_date,\n end_date=end_date,\n )\n }}\n\n)\nselect\n cast(d.date_{{ datepart }} as {{ dbt.type_timestamp() }}) as date_{{ datepart }}\nfrom\n date_spine d\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt.type_timestamp", + "macro.dbt.dateadd", + "macro.dbt_date.now", + "macro.dbt_date.tomorrow", + "macro.dbt_date.date_spine" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6165478, + "supported_languages": null + }, + "macro.dbt_date.get_date_dimension": { + "name": "get_date_dimension", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/get_date_dimension.sql", + "original_file_path": "macros/get_date_dimension.sql", + "unique_id": "macro.dbt_date.get_date_dimension", + "macro_sql": "{% macro get_date_dimension(start_date, end_date) %}\n {{ adapter.dispatch('get_date_dimension', 'dbt_date') (start_date, end_date) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__get_date_dimension"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6254494, + "supported_languages": null + }, + "macro.dbt_date.default__get_date_dimension": { + "name": "default__get_date_dimension", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/get_date_dimension.sql", + "original_file_path": "macros/get_date_dimension.sql", + "unique_id": "macro.dbt_date.default__get_date_dimension", + "macro_sql": "{% macro default__get_date_dimension(start_date, end_date) %}\nwith base_dates as (\n {{ dbt_date.get_base_dates(start_date, end_date) }}\n),\ndates_with_prior_year_dates as (\n\n select\n cast(d.date_day as date) as date_day,\n cast({{ dbt.dateadd('year', -1 , 'd.date_day') }} as date) as prior_year_date_day,\n cast({{ dbt.dateadd('day', -364 , 'd.date_day') }} as date) as prior_year_over_year_date_day\n from\n \tbase_dates d\n\n)\nselect\n d.date_day,\n {{ dbt_date.yesterday('d.date_day') }} as prior_date_day,\n {{ dbt_date.tomorrow('d.date_day') }} as next_date_day,\n d.prior_year_date_day as prior_year_date_day,\n d.prior_year_over_year_date_day,\n {{ dbt_date.day_of_week('d.date_day', isoweek=false) }} as day_of_week,\n {{ dbt_date.day_of_week('d.date_day', isoweek=true) }} as day_of_week_iso,\n {{ dbt_date.day_name('d.date_day', short=false) }} as day_of_week_name,\n {{ dbt_date.day_name('d.date_day', short=true) }} as day_of_week_name_short,\n {{ dbt_date.day_of_month('d.date_day') }} as day_of_month,\n {{ dbt_date.day_of_year('d.date_day') }} as day_of_year,\n\n {{ dbt_date.week_start('d.date_day') }} as week_start_date,\n {{ dbt_date.week_end('d.date_day') }} as week_end_date,\n {{ dbt_date.week_start('d.prior_year_over_year_date_day') }} as prior_year_week_start_date,\n {{ dbt_date.week_end('d.prior_year_over_year_date_day') }} as prior_year_week_end_date,\n {{ dbt_date.week_of_year('d.date_day') }} as week_of_year,\n\n {{ dbt_date.iso_week_start('d.date_day') }} as iso_week_start_date,\n {{ dbt_date.iso_week_end('d.date_day') }} as iso_week_end_date,\n {{ dbt_date.iso_week_start('d.prior_year_over_year_date_day') }} as prior_year_iso_week_start_date,\n {{ dbt_date.iso_week_end('d.prior_year_over_year_date_day') }} as prior_year_iso_week_end_date,\n {{ dbt_date.iso_week_of_year('d.date_day') }} as iso_week_of_year,\n\n {{ dbt_date.week_of_year('d.prior_year_over_year_date_day') }} as prior_year_week_of_year,\n {{ dbt_date.iso_week_of_year('d.prior_year_over_year_date_day') }} as prior_year_iso_week_of_year,\n\n cast({{ dbt_date.date_part('month', 'd.date_day') }} as {{ dbt.type_int() }}) as month_of_year,\n {{ dbt_date.month_name('d.date_day', short=false) }} as month_name,\n {{ dbt_date.month_name('d.date_day', short=true) }} as month_name_short,\n\n cast({{ dbt.date_trunc('month', 'd.date_day') }} as date) as month_start_date,\n cast({{ last_day('d.date_day', 'month') }} as date) as month_end_date,\n\n cast({{ dbt.date_trunc('month', 'd.prior_year_date_day') }} as date) as prior_year_month_start_date,\n cast({{ last_day('d.prior_year_date_day', 'month') }} as date) as prior_year_month_end_date,\n\n cast({{ dbt_date.date_part('quarter', 'd.date_day') }} as {{ dbt.type_int() }}) as quarter_of_year,\n cast({{ dbt.date_trunc('quarter', 'd.date_day') }} as date) as quarter_start_date,\n cast({{ last_day('d.date_day', 'quarter') }} as date) as quarter_end_date,\n\n cast({{ dbt_date.date_part('year', 'd.date_day') }} as {{ dbt.type_int() }}) as year_number,\n cast({{ dbt.date_trunc('year', 'd.date_day') }} as date) as year_start_date,\n cast({{ last_day('d.date_day', 'year') }} as date) as year_end_date\nfrom\n dates_with_prior_year_dates d\norder by 1\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_date.get_base_dates", + "macro.dbt.dateadd", + "macro.dbt_date.yesterday", + "macro.dbt_date.tomorrow", + "macro.dbt_date.day_of_week", + "macro.dbt_date.day_name", + "macro.dbt_date.day_of_month", + "macro.dbt_date.day_of_year", + "macro.dbt_date.week_start", + "macro.dbt_date.week_end", + "macro.dbt_date.week_of_year", + "macro.dbt_date.iso_week_start", + "macro.dbt_date.iso_week_end", + "macro.dbt_date.iso_week_of_year", + "macro.dbt_date.date_part", + "macro.dbt.type_int", + "macro.dbt_date.month_name", + "macro.dbt.date_trunc", + "macro.dbt.last_day" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6291738, + "supported_languages": null + }, + "macro.dbt_date.postgres__get_date_dimension": { + "name": "postgres__get_date_dimension", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/get_date_dimension.sql", + "original_file_path": "macros/get_date_dimension.sql", + "unique_id": "macro.dbt_date.postgres__get_date_dimension", + "macro_sql": "{% macro postgres__get_date_dimension(start_date, end_date) %}\nwith base_dates as (\n {{ dbt_date.get_base_dates(start_date, end_date) }}\n),\ndates_with_prior_year_dates as (\n\n select\n cast(d.date_day as date) as date_day,\n cast({{ dbt.dateadd('year', -1 , 'd.date_day') }} as date) as prior_year_date_day,\n cast({{ dbt.dateadd('day', -364 , 'd.date_day') }} as date) as prior_year_over_year_date_day\n from\n \tbase_dates d\n\n)\nselect\n d.date_day,\n {{ dbt_date.yesterday('d.date_day') }} as prior_date_day,\n {{ dbt_date.tomorrow('d.date_day') }} as next_date_day,\n d.prior_year_date_day as prior_year_date_day,\n d.prior_year_over_year_date_day,\n {{ dbt_date.day_of_week('d.date_day', isoweek=true) }} as day_of_week,\n\n {{ dbt_date.day_name('d.date_day', short=false) }} as day_of_week_name,\n {{ dbt_date.day_name('d.date_day', short=true) }} as day_of_week_name_short,\n {{ dbt_date.day_of_month('d.date_day') }} as day_of_month,\n {{ dbt_date.day_of_year('d.date_day') }} as day_of_year,\n\n {{ dbt_date.week_start('d.date_day') }} as week_start_date,\n {{ dbt_date.week_end('d.date_day') }} as week_end_date,\n {{ dbt_date.week_start('d.prior_year_over_year_date_day') }} as prior_year_week_start_date,\n {{ dbt_date.week_end('d.prior_year_over_year_date_day') }} as prior_year_week_end_date,\n {{ dbt_date.week_of_year('d.date_day') }} as week_of_year,\n\n {{ dbt_date.iso_week_start('d.date_day') }} as iso_week_start_date,\n {{ dbt_date.iso_week_end('d.date_day') }} as iso_week_end_date,\n {{ dbt_date.iso_week_start('d.prior_year_over_year_date_day') }} as prior_year_iso_week_start_date,\n {{ dbt_date.iso_week_end('d.prior_year_over_year_date_day') }} as prior_year_iso_week_end_date,\n {{ dbt_date.iso_week_of_year('d.date_day') }} as iso_week_of_year,\n\n {{ dbt_date.week_of_year('d.prior_year_over_year_date_day') }} as prior_year_week_of_year,\n {{ dbt_date.iso_week_of_year('d.prior_year_over_year_date_day') }} as prior_year_iso_week_of_year,\n\n cast({{ dbt_date.date_part('month', 'd.date_day') }} as {{ dbt.type_int() }}) as month_of_year,\n {{ dbt_date.month_name('d.date_day', short=false) }} as month_name,\n {{ dbt_date.month_name('d.date_day', short=true) }} as month_name_short,\n\n cast({{ dbt.date_trunc('month', 'd.date_day') }} as date) as month_start_date,\n cast({{ last_day('d.date_day', 'month') }} as date) as month_end_date,\n\n cast({{ dbt.date_trunc('month', 'd.prior_year_date_day') }} as date) as prior_year_month_start_date,\n cast({{ last_day('d.prior_year_date_day', 'month') }} as date) as prior_year_month_end_date,\n\n cast({{ dbt_date.date_part('quarter', 'd.date_day') }} as {{ dbt.type_int() }}) as quarter_of_year,\n cast({{ dbt.date_trunc('quarter', 'd.date_day') }} as date) as quarter_start_date,\n {# last_day does not support quarter because postgresql does not support quarter interval. #}\n cast({{dbt.dateadd('day', '-1', dbt.dateadd('month', '3', dbt.date_trunc('quarter', 'd.date_day')))}} as date) as quarter_end_date,\n\n cast({{ dbt_date.date_part('year', 'd.date_day') }} as {{ dbt.type_int() }}) as year_number,\n cast({{ dbt.date_trunc('year', 'd.date_day') }} as date) as year_start_date,\n cast({{ last_day('d.date_day', 'year') }} as date) as year_end_date\nfrom\n dates_with_prior_year_dates d\norder by 1\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_date.get_base_dates", + "macro.dbt.dateadd", + "macro.dbt_date.yesterday", + "macro.dbt_date.tomorrow", + "macro.dbt_date.day_of_week", + "macro.dbt_date.day_name", + "macro.dbt_date.day_of_month", + "macro.dbt_date.day_of_year", + "macro.dbt_date.week_start", + "macro.dbt_date.week_end", + "macro.dbt_date.week_of_year", + "macro.dbt_date.iso_week_start", + "macro.dbt_date.iso_week_end", + "macro.dbt_date.iso_week_of_year", + "macro.dbt_date.date_part", + "macro.dbt.type_int", + "macro.dbt_date.month_name", + "macro.dbt.date_trunc", + "macro.dbt.last_day" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.633159, + "supported_languages": null + }, + "macro.dbt_date.iso_week_of_year": { + "name": "iso_week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_of_year.sql", + "original_file_path": "macros/calendar_date/iso_week_of_year.sql", + "unique_id": "macro.dbt_date.iso_week_of_year", + "macro_sql": "{%- macro iso_week_of_year(date=None, tz=None) -%}\n{%-set dt = date if date else dbt_date.today(tz) -%}\n{{ adapter.dispatch('iso_week_of_year', 'dbt_date') (dt) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_date.today", + "macro.dbt_date.duckdb__iso_week_of_year" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6342206, + "supported_languages": null + }, + "macro.dbt_date._iso_week_of_year": { + "name": "_iso_week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_of_year.sql", + "original_file_path": "macros/calendar_date/iso_week_of_year.sql", + "unique_id": "macro.dbt_date._iso_week_of_year", + "macro_sql": "{%- macro _iso_week_of_year(date, week_type) -%}\ncast({{ dbt_date.date_part(week_type, date) }} as {{ dbt.type_int() }})\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part", "macro.dbt.type_int"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6345215, + "supported_languages": null + }, + "macro.dbt_date.default__iso_week_of_year": { + "name": "default__iso_week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_of_year.sql", + "original_file_path": "macros/calendar_date/iso_week_of_year.sql", + "unique_id": "macro.dbt_date.default__iso_week_of_year", + "macro_sql": "\n\n{%- macro default__iso_week_of_year(date) -%}\n{{ dbt_date._iso_week_of_year(date, 'isoweek') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_of_year"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6348372, + "supported_languages": null + }, + "macro.dbt_date.snowflake__iso_week_of_year": { + "name": "snowflake__iso_week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_of_year.sql", + "original_file_path": "macros/calendar_date/iso_week_of_year.sql", + "unique_id": "macro.dbt_date.snowflake__iso_week_of_year", + "macro_sql": "\n\n{%- macro snowflake__iso_week_of_year(date) -%}\n{{ dbt_date._iso_week_of_year(date, 'weekiso') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_of_year"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6350913, + "supported_languages": null + }, + "macro.dbt_date.postgres__iso_week_of_year": { + "name": "postgres__iso_week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_of_year.sql", + "original_file_path": "macros/calendar_date/iso_week_of_year.sql", + "unique_id": "macro.dbt_date.postgres__iso_week_of_year", + "macro_sql": "\n\n{%- macro postgres__iso_week_of_year(date) -%}\n-- postgresql week is isoweek, the first week of a year containing January 4 of that year.\n{{ dbt_date._iso_week_of_year(date, 'week') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_of_year"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.635335, + "supported_languages": null + }, + "macro.dbt_date.duckdb__iso_week_of_year": { + "name": "duckdb__iso_week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_of_year.sql", + "original_file_path": "macros/calendar_date/iso_week_of_year.sql", + "unique_id": "macro.dbt_date.duckdb__iso_week_of_year", + "macro_sql": "\n\n{%- macro duckdb__iso_week_of_year(date) -%}\n{{ return(dbt_date.postgres__iso_week_of_year(date)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.postgres__iso_week_of_year"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6355677, + "supported_languages": null + }, + "macro.dbt_date.spark__iso_week_of_year": { + "name": "spark__iso_week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_of_year.sql", + "original_file_path": "macros/calendar_date/iso_week_of_year.sql", + "unique_id": "macro.dbt_date.spark__iso_week_of_year", + "macro_sql": "\n\n{%- macro spark__iso_week_of_year(date) -%}\n{{ dbt_date._iso_week_of_year(date, 'week') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_of_year"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.635787, + "supported_languages": null + }, + "macro.dbt_date.trino__iso_week_of_year": { + "name": "trino__iso_week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_of_year.sql", + "original_file_path": "macros/calendar_date/iso_week_of_year.sql", + "unique_id": "macro.dbt_date.trino__iso_week_of_year", + "macro_sql": "\n\n{%- macro trino__iso_week_of_year(date) -%}\n{{ dbt_date._iso_week_of_year(date, 'week') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_of_year"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6360037, + "supported_languages": null + }, + "macro.dbt_date.round_timestamp": { + "name": "round_timestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/round_timestamp.sql", + "original_file_path": "macros/calendar_date/round_timestamp.sql", + "unique_id": "macro.dbt_date.round_timestamp", + "macro_sql": "{% macro round_timestamp(timestamp) %}\n {{ dbt.date_trunc(\"day\", dbt.dateadd(\"hour\", 12, timestamp)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.date_trunc", "macro.dbt.dateadd"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6364408, + "supported_languages": null + }, + "macro.dbt_date.iso_week_end": { + "name": "iso_week_end", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_end.sql", + "original_file_path": "macros/calendar_date/iso_week_end.sql", + "unique_id": "macro.dbt_date.iso_week_end", + "macro_sql": "{%- macro iso_week_end(date=None, tz=None) -%}\n{%-set dt = date if date else dbt_date.today(tz) -%}\n{{ adapter.dispatch('iso_week_end', 'dbt_date') (dt) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_date.today", + "macro.dbt_date.default__iso_week_end" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6372883, + "supported_languages": null + }, + "macro.dbt_date._iso_week_end": { + "name": "_iso_week_end", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_end.sql", + "original_file_path": "macros/calendar_date/iso_week_end.sql", + "unique_id": "macro.dbt_date._iso_week_end", + "macro_sql": "{%- macro _iso_week_end(date, week_type) -%}\n{%- set dt = dbt_date.iso_week_start(date) -%}\n{{ dbt_date.n_days_away(6, dt) }}\n{%- endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_date.iso_week_start", + "macro.dbt_date.n_days_away" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6376622, + "supported_languages": null + }, + "macro.dbt_date.default__iso_week_end": { + "name": "default__iso_week_end", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_end.sql", + "original_file_path": "macros/calendar_date/iso_week_end.sql", + "unique_id": "macro.dbt_date.default__iso_week_end", + "macro_sql": "\n\n{%- macro default__iso_week_end(date) -%}\n{{ dbt_date._iso_week_end(date, 'isoweek') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_end"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6378815, + "supported_languages": null + }, + "macro.dbt_date.snowflake__iso_week_end": { + "name": "snowflake__iso_week_end", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_end.sql", + "original_file_path": "macros/calendar_date/iso_week_end.sql", + "unique_id": "macro.dbt_date.snowflake__iso_week_end", + "macro_sql": "\n\n{%- macro snowflake__iso_week_end(date) -%}\n{{ dbt_date._iso_week_end(date, 'weekiso') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_end"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6380956, + "supported_languages": null + }, + "macro.dbt_date.week_end": { + "name": "week_end", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_end.sql", + "original_file_path": "macros/calendar_date/week_end.sql", + "unique_id": "macro.dbt_date.week_end", + "macro_sql": "{%- macro week_end(date=None, tz=None) -%}\n{%-set dt = date if date else dbt_date.today(tz) -%}\n{{ adapter.dispatch('week_end', 'dbt_date') (dt) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_date.today", "macro.dbt_date.duckdb__week_end"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6388922, + "supported_languages": null + }, + "macro.dbt_date.default__week_end": { + "name": "default__week_end", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_end.sql", + "original_file_path": "macros/calendar_date/week_end.sql", + "unique_id": "macro.dbt_date.default__week_end", + "macro_sql": "{%- macro default__week_end(date) -%}\n{{ last_day(date, 'week') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.last_day"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6391125, + "supported_languages": null + }, + "macro.dbt_date.snowflake__week_end": { + "name": "snowflake__week_end", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_end.sql", + "original_file_path": "macros/calendar_date/week_end.sql", + "unique_id": "macro.dbt_date.snowflake__week_end", + "macro_sql": "\n\n{%- macro snowflake__week_end(date) -%}\n{%- set dt = dbt_date.week_start(date) -%}\n{{ dbt_date.n_days_away(6, dt) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.week_start", "macro.dbt_date.n_days_away"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6394134, + "supported_languages": null + }, + "macro.dbt_date.postgres__week_end": { + "name": "postgres__week_end", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_end.sql", + "original_file_path": "macros/calendar_date/week_end.sql", + "unique_id": "macro.dbt_date.postgres__week_end", + "macro_sql": "\n\n{%- macro postgres__week_end(date) -%}\n{%- set dt = dbt_date.week_start(date) -%}\n{{ dbt_date.n_days_away(6, dt) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.week_start", "macro.dbt_date.n_days_away"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6397102, + "supported_languages": null + }, + "macro.dbt_date.duckdb__week_end": { + "name": "duckdb__week_end", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_end.sql", + "original_file_path": "macros/calendar_date/week_end.sql", + "unique_id": "macro.dbt_date.duckdb__week_end", + "macro_sql": "\n\n{%- macro duckdb__week_end(date) -%}\n{{ return(dbt_date.postgres__week_end(date)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.postgres__week_end"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6399982, + "supported_languages": null + }, + "macro.dbt_date.day_of_month": { + "name": "day_of_month", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_month.sql", + "original_file_path": "macros/calendar_date/day_of_month.sql", + "unique_id": "macro.dbt_date.day_of_month", + "macro_sql": "{%- macro day_of_month(date) -%}\n{{ dbt_date.date_part('day', date) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6404483, + "supported_languages": null + }, + "macro.dbt_date.redshift__day_of_month": { + "name": "redshift__day_of_month", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_month.sql", + "original_file_path": "macros/calendar_date/day_of_month.sql", + "unique_id": "macro.dbt_date.redshift__day_of_month", + "macro_sql": "\n\n{%- macro redshift__day_of_month(date) -%}\ncast({{ dbt_date.date_part('day', date) }} as {{ dbt.type_bigint() }})\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part", "macro.dbt.type_bigint"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6408553, + "supported_languages": null + }, + "macro.dbt_date.day_of_year": { + "name": "day_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_year.sql", + "original_file_path": "macros/calendar_date/day_of_year.sql", + "unique_id": "macro.dbt_date.day_of_year", + "macro_sql": "{%- macro day_of_year(date) -%}\n{{ adapter.dispatch('day_of_year', 'dbt_date') (date) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__day_of_year"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.641401, + "supported_languages": null + }, + "macro.dbt_date.default__day_of_year": { + "name": "default__day_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_year.sql", + "original_file_path": "macros/calendar_date/day_of_year.sql", + "unique_id": "macro.dbt_date.default__day_of_year", + "macro_sql": "\n\n{%- macro default__day_of_year(date) -%}\n {{ dbt_date.date_part('dayofyear', date) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6416574, + "supported_languages": null + }, + "macro.dbt_date.postgres__day_of_year": { + "name": "postgres__day_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_year.sql", + "original_file_path": "macros/calendar_date/day_of_year.sql", + "unique_id": "macro.dbt_date.postgres__day_of_year", + "macro_sql": "\n\n{%- macro postgres__day_of_year(date) -%}\n {{ dbt_date.date_part('doy', date) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6418839, + "supported_languages": null + }, + "macro.dbt_date.redshift__day_of_year": { + "name": "redshift__day_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_year.sql", + "original_file_path": "macros/calendar_date/day_of_year.sql", + "unique_id": "macro.dbt_date.redshift__day_of_year", + "macro_sql": "\n\n{%- macro redshift__day_of_year(date) -%}\n cast({{ dbt_date.date_part('dayofyear', date) }} as {{ dbt.type_bigint() }})\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part", "macro.dbt.type_bigint"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.642192, + "supported_languages": null + }, + "macro.dbt_date.spark__day_of_year": { + "name": "spark__day_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_year.sql", + "original_file_path": "macros/calendar_date/day_of_year.sql", + "unique_id": "macro.dbt_date.spark__day_of_year", + "macro_sql": "\n\n{%- macro spark__day_of_year(date) -%}\n dayofyear({{ date }})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6423805, + "supported_languages": null + }, + "macro.dbt_date.trino__day_of_year": { + "name": "trino__day_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_year.sql", + "original_file_path": "macros/calendar_date/day_of_year.sql", + "unique_id": "macro.dbt_date.trino__day_of_year", + "macro_sql": "\n\n{%- macro trino__day_of_year(date) -%}\n {{ dbt_date.date_part('day_of_year', date) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6425927, + "supported_languages": null + }, + "macro.dbt_date.next_week": { + "name": "next_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/next_week.sql", + "original_file_path": "macros/calendar_date/next_week.sql", + "unique_id": "macro.dbt_date.next_week", + "macro_sql": "{%- macro next_week(tz=None) -%}\n{{ dbt_date.n_weeks_away(1, tz) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.n_weeks_away"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6428711, + "supported_languages": null + }, + "macro.dbt_date.month_name": { + "name": "month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/month_name.sql", + "original_file_path": "macros/calendar_date/month_name.sql", + "unique_id": "macro.dbt_date.month_name", + "macro_sql": "{%- macro month_name(date, short=True) -%}\n {{ adapter.dispatch('month_name', 'dbt_date') (date, short) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.duckdb__month_name"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6437416, + "supported_languages": null + }, + "macro.dbt_date.default__month_name": { + "name": "default__month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/month_name.sql", + "original_file_path": "macros/calendar_date/month_name.sql", + "unique_id": "macro.dbt_date.default__month_name", + "macro_sql": "\n\n{%- macro default__month_name(date, short) -%}\n{%- set f = 'MON' if short else 'MONTH' -%}\n to_char({{ date }}, '{{ f }}')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6440628, + "supported_languages": null + }, + "macro.dbt_date.bigquery__month_name": { + "name": "bigquery__month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/month_name.sql", + "original_file_path": "macros/calendar_date/month_name.sql", + "unique_id": "macro.dbt_date.bigquery__month_name", + "macro_sql": "\n\n{%- macro bigquery__month_name(date, short) -%}\n{%- set f = '%b' if short else '%B' -%}\n format_date('{{ f }}', cast({{ date }} as date))\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6443732, + "supported_languages": null + }, + "macro.dbt_date.snowflake__month_name": { + "name": "snowflake__month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/month_name.sql", + "original_file_path": "macros/calendar_date/month_name.sql", + "unique_id": "macro.dbt_date.snowflake__month_name", + "macro_sql": "\n\n{%- macro snowflake__month_name(date, short) -%}\n{%- set f = 'MON' if short else 'MMMM' -%}\n to_char({{ date }}, '{{ f }}')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6446688, + "supported_languages": null + }, + "macro.dbt_date.postgres__month_name": { + "name": "postgres__month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/month_name.sql", + "original_file_path": "macros/calendar_date/month_name.sql", + "unique_id": "macro.dbt_date.postgres__month_name", + "macro_sql": "\n\n{%- macro postgres__month_name(date, short) -%}\n{# FM = Fill mode, which suppresses padding blanks #}\n{%- set f = 'FMMon' if short else 'FMMonth' -%}\n to_char({{ date }}, '{{ f }}')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.645021, + "supported_languages": null + }, + "macro.dbt_date.duckdb__month_name": { + "name": "duckdb__month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/month_name.sql", + "original_file_path": "macros/calendar_date/month_name.sql", + "unique_id": "macro.dbt_date.duckdb__month_name", + "macro_sql": "\n\n\n{%- macro duckdb__month_name(date, short) -%}\n {%- if short -%}\n substr(monthname({{ date }}), 1, 3)\n {%- else -%}\n monthname({{ date }})\n {%- endif -%}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6453152, + "supported_languages": null + }, + "macro.dbt_date.spark__month_name": { + "name": "spark__month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/month_name.sql", + "original_file_path": "macros/calendar_date/month_name.sql", + "unique_id": "macro.dbt_date.spark__month_name", + "macro_sql": "\n\n{%- macro spark__month_name(date, short) -%}\n{%- set f = 'LLL' if short else 'LLLL' -%}\n date_format({{ date }}, '{{ f }}')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6456578, + "supported_languages": null + }, + "macro.dbt_date.trino__month_name": { + "name": "trino__month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/month_name.sql", + "original_file_path": "macros/calendar_date/month_name.sql", + "unique_id": "macro.dbt_date.trino__month_name", + "macro_sql": "\n\n{%- macro trino__month_name(date, short) -%}\n{%- set f = 'b' if short else 'M' -%}\n date_format({{ date }}, '%{{ f }}')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6459653, + "supported_languages": null + }, + "macro.dbt_date.n_weeks_ago": { + "name": "n_weeks_ago", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/n_weeks_ago.sql", + "original_file_path": "macros/calendar_date/n_weeks_ago.sql", + "unique_id": "macro.dbt_date.n_weeks_ago", + "macro_sql": "{%- macro n_weeks_ago(n, tz=None) -%}\n{%- set n = n|int -%}\n{{ dbt.date_trunc('week',\n dbt.dateadd('week', -1 * n,\n dbt_date.today(tz)\n )\n ) }}\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.date_trunc", + "macro.dbt.dateadd", + "macro.dbt_date.today" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.646588, + "supported_languages": null + }, + "macro.dbt_date.convert_timezone": { + "name": "convert_timezone", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/convert_timezone.sql", + "original_file_path": "macros/calendar_date/convert_timezone.sql", + "unique_id": "macro.dbt_date.convert_timezone", + "macro_sql": "{%- macro convert_timezone(column, target_tz=None, source_tz=None) -%}\n{%- set source_tz = \"UTC\" if not source_tz else source_tz -%}\n{%- set target_tz = var(\"dbt_date:time_zone\") if not target_tz else target_tz -%}\n{{ adapter.dispatch('convert_timezone', 'dbt_date') (column, target_tz, source_tz) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_date.duckdb__convert_timezone"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.64819, + "supported_languages": null + }, + "macro.dbt_date.default__convert_timezone": { + "name": "default__convert_timezone", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/convert_timezone.sql", + "original_file_path": "macros/calendar_date/convert_timezone.sql", + "unique_id": "macro.dbt_date.default__convert_timezone", + "macro_sql": "{% macro default__convert_timezone(column, target_tz, source_tz) -%}\nconvert_timezone('{{ source_tz }}', '{{ target_tz }}',\n cast({{ column }} as {{ dbt.type_timestamp() }})\n)\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.type_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6486208, + "supported_languages": null + }, + "macro.dbt_date.bigquery__convert_timezone": { + "name": "bigquery__convert_timezone", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/convert_timezone.sql", + "original_file_path": "macros/calendar_date/convert_timezone.sql", + "unique_id": "macro.dbt_date.bigquery__convert_timezone", + "macro_sql": "{%- macro bigquery__convert_timezone(column, target_tz, source_tz=None) -%}\ntimestamp(datetime({{ column }}, '{{ target_tz}}'))\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6488576, + "supported_languages": null + }, + "macro.dbt_date.postgres__convert_timezone": { + "name": "postgres__convert_timezone", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/convert_timezone.sql", + "original_file_path": "macros/calendar_date/convert_timezone.sql", + "unique_id": "macro.dbt_date.postgres__convert_timezone", + "macro_sql": "{% macro postgres__convert_timezone(column, target_tz, source_tz) -%}\ncast(\n cast({{ column }} as {{ dbt.type_timestamp() }})\n at time zone '{{ source_tz }}' at time zone '{{ target_tz }}' as {{ dbt.type_timestamp() }}\n)\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt.type_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.649242, + "supported_languages": null + }, + "macro.dbt_date.redshift__convert_timezone": { + "name": "redshift__convert_timezone", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/convert_timezone.sql", + "original_file_path": "macros/calendar_date/convert_timezone.sql", + "unique_id": "macro.dbt_date.redshift__convert_timezone", + "macro_sql": "{%- macro redshift__convert_timezone(column, target_tz, source_tz) -%}\n{{ return(dbt_date.default__convert_timezone(column, target_tz, source_tz)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_date.default__convert_timezone"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6495266, + "supported_languages": null + }, + "macro.dbt_date.duckdb__convert_timezone": { + "name": "duckdb__convert_timezone", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/convert_timezone.sql", + "original_file_path": "macros/calendar_date/convert_timezone.sql", + "unique_id": "macro.dbt_date.duckdb__convert_timezone", + "macro_sql": "{% macro duckdb__convert_timezone(column, target_tz, source_tz) -%}\n{{ return(dbt_date.postgres__convert_timezone(column, target_tz, source_tz)) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_date.postgres__convert_timezone"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.649801, + "supported_languages": null + }, + "macro.dbt_date.spark__convert_timezone": { + "name": "spark__convert_timezone", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/convert_timezone.sql", + "original_file_path": "macros/calendar_date/convert_timezone.sql", + "unique_id": "macro.dbt_date.spark__convert_timezone", + "macro_sql": "{%- macro spark__convert_timezone(column, target_tz, source_tz) -%}\nfrom_utc_timestamp(\n to_utc_timestamp({{ column }}, '{{ source_tz }}'),\n '{{ target_tz }}'\n )\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.650044, + "supported_languages": null + }, + "macro.dbt_date.trino__convert_timezone": { + "name": "trino__convert_timezone", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/convert_timezone.sql", + "original_file_path": "macros/calendar_date/convert_timezone.sql", + "unique_id": "macro.dbt_date.trino__convert_timezone", + "macro_sql": "{%- macro trino__convert_timezone(column, target_tz, source_tz) -%}\n cast((at_timezone(with_timezone(cast({{ column }} as {{ dbt.type_timestamp() }}), '{{ source_tz }}'), '{{ target_tz }}')) as {{ dbt.type_timestamp() }})\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt.type_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.650394, + "supported_languages": null + }, + "macro.dbt_date.week_start": { + "name": "week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_start.sql", + "original_file_path": "macros/calendar_date/week_start.sql", + "unique_id": "macro.dbt_date.week_start", + "macro_sql": "{%- macro week_start(date=None, tz=None) -%}\n{%-set dt = date if date else dbt_date.today(tz) -%}\n{{ adapter.dispatch('week_start', 'dbt_date') (dt) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_date.today", "macro.dbt_date.duckdb__week_start"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.651125, + "supported_languages": null + }, + "macro.dbt_date.default__week_start": { + "name": "default__week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_start.sql", + "original_file_path": "macros/calendar_date/week_start.sql", + "unique_id": "macro.dbt_date.default__week_start", + "macro_sql": "{%- macro default__week_start(date) -%}\ncast({{ dbt.date_trunc('week', date) }} as date)\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.date_trunc"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6513574, + "supported_languages": null + }, + "macro.dbt_date.snowflake__week_start": { + "name": "snowflake__week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_start.sql", + "original_file_path": "macros/calendar_date/week_start.sql", + "unique_id": "macro.dbt_date.snowflake__week_start", + "macro_sql": "\n\n{%- macro snowflake__week_start(date) -%}\n {#\n Get the day of week offset: e.g. if the date is a Sunday,\n dbt_date.day_of_week returns 1, so we subtract 1 to get a 0 offset\n #}\n {% set off_set = dbt_date.day_of_week(date, isoweek=False) ~ \" - 1\" %}\n cast({{ dbt.dateadd(\"day\", \"-1 * (\" ~ off_set ~ \")\", date) }} as date)\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.day_of_week", "macro.dbt.dateadd"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6518512, + "supported_languages": null + }, + "macro.dbt_date.postgres__week_start": { + "name": "postgres__week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_start.sql", + "original_file_path": "macros/calendar_date/week_start.sql", + "unique_id": "macro.dbt_date.postgres__week_start", + "macro_sql": "\n\n{%- macro postgres__week_start(date) -%}\n-- Sunday as week start date\ncast({{ dbt.dateadd('day', -1, dbt.date_trunc('week', dbt.dateadd('day', 1, date))) }} as date)\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.dateadd", "macro.dbt.date_trunc"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.652329, + "supported_languages": null + }, + "macro.dbt_date.duckdb__week_start": { + "name": "duckdb__week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_start.sql", + "original_file_path": "macros/calendar_date/week_start.sql", + "unique_id": "macro.dbt_date.duckdb__week_start", + "macro_sql": "\n\n{%- macro duckdb__week_start(date) -%}\n{{ return(dbt_date.postgres__week_start(date)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.postgres__week_start"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.652651, + "supported_languages": null + }, + "macro.dbt_date.last_week": { + "name": "last_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/last_week.sql", + "original_file_path": "macros/calendar_date/last_week.sql", + "unique_id": "macro.dbt_date.last_week", + "macro_sql": "{%- macro last_week(tz=None) -%}\n{{ dbt_date.n_weeks_ago(1, tz) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.n_weeks_ago"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6530573, + "supported_languages": null + }, + "macro.dbt_date.next_month_name": { + "name": "next_month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/next_month_name.sql", + "original_file_path": "macros/calendar_date/next_month_name.sql", + "unique_id": "macro.dbt_date.next_month_name", + "macro_sql": "{%- macro next_month_name(short=True, tz=None) -%}\n{{ dbt_date.month_name(dbt_date.next_month(tz), short=short) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.month_name", "macro.dbt_date.next_month"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6534498, + "supported_languages": null + }, + "macro.dbt_date.last_month_number": { + "name": "last_month_number", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/last_month_number.sql", + "original_file_path": "macros/calendar_date/last_month_number.sql", + "unique_id": "macro.dbt_date.last_month_number", + "macro_sql": "{%- macro last_month_number(tz=None) -%}\n{{ dbt_date.date_part('month', dbt_date.last_month(tz)) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.date_part", "macro.dbt_date.last_month"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6537983, + "supported_languages": null + }, + "macro.dbt_date.now": { + "name": "now", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/now.sql", + "original_file_path": "macros/calendar_date/now.sql", + "unique_id": "macro.dbt_date.now", + "macro_sql": "{%- macro now(tz=None) -%}\n{{ dbt_date.convert_timezone(dbt.current_timestamp(), tz) }}\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt_date.convert_timezone", + "macro.dbt.current_timestamp" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6540995, + "supported_languages": null + }, + "macro.dbt_date.today": { + "name": "today", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/today.sql", + "original_file_path": "macros/calendar_date/today.sql", + "unique_id": "macro.dbt_date.today", + "macro_sql": "{%- macro today(tz=None) -%}\ncast({{ dbt_date.now(tz) }} as date)\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.now"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6544058, + "supported_languages": null + }, + "macro.dbt_date.day_name": { + "name": "day_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_name.sql", + "original_file_path": "macros/calendar_date/day_name.sql", + "unique_id": "macro.dbt_date.day_name", + "macro_sql": "{%- macro day_name(date, short=True) -%}\n {{ adapter.dispatch('day_name', 'dbt_date') (date, short) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.duckdb__day_name"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.655396, + "supported_languages": null + }, + "macro.dbt_date.default__day_name": { + "name": "default__day_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_name.sql", + "original_file_path": "macros/calendar_date/day_name.sql", + "unique_id": "macro.dbt_date.default__day_name", + "macro_sql": "\n\n{%- macro default__day_name(date, short) -%}\n{%- set f = 'Dy' if short else 'Day' -%}\n to_char({{ date }}, '{{ f }}')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6556993, + "supported_languages": null + }, + "macro.dbt_date.snowflake__day_name": { + "name": "snowflake__day_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_name.sql", + "original_file_path": "macros/calendar_date/day_name.sql", + "unique_id": "macro.dbt_date.snowflake__day_name", + "macro_sql": "\n\n{%- macro snowflake__day_name(date, short) -%}\n {%- if short -%}\n dayname({{ date }})\n {%- else -%}\n -- long version not implemented on Snowflake so we're doing it manually :/\n case dayname({{ date }})\n when 'Mon' then 'Monday'\n when 'Tue' then 'Tuesday'\n when 'Wed' then 'Wednesday'\n when 'Thu' then 'Thursday'\n when 'Fri' then 'Friday'\n when 'Sat' then 'Saturday'\n when 'Sun' then 'Sunday'\n end\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.65607, + "supported_languages": null + }, + "macro.dbt_date.bigquery__day_name": { + "name": "bigquery__day_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_name.sql", + "original_file_path": "macros/calendar_date/day_name.sql", + "unique_id": "macro.dbt_date.bigquery__day_name", + "macro_sql": "\n\n{%- macro bigquery__day_name(date, short) -%}\n{%- set f = '%a' if short else '%A' -%}\n format_date('{{ f }}', cast({{ date }} as date))\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6565127, + "supported_languages": null + }, + "macro.dbt_date.postgres__day_name": { + "name": "postgres__day_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_name.sql", + "original_file_path": "macros/calendar_date/day_name.sql", + "unique_id": "macro.dbt_date.postgres__day_name", + "macro_sql": "\n\n{%- macro postgres__day_name(date, short) -%}\n{# FM = Fill mode, which suppresses padding blanks #}\n{%- set f = 'FMDy' if short else 'FMDay' -%}\n to_char({{ date }}, '{{ f }}')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6568248, + "supported_languages": null + }, + "macro.dbt_date.duckdb__day_name": { + "name": "duckdb__day_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_name.sql", + "original_file_path": "macros/calendar_date/day_name.sql", + "unique_id": "macro.dbt_date.duckdb__day_name", + "macro_sql": "\n\n{%- macro duckdb__day_name(date, short) -%}\n {%- if short -%}\n substr(dayname({{ date }}), 1, 3)\n {%- else -%}\n dayname({{ date }})\n {%- endif -%}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6571267, + "supported_languages": null + }, + "macro.dbt_date.spark__day_name": { + "name": "spark__day_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_name.sql", + "original_file_path": "macros/calendar_date/day_name.sql", + "unique_id": "macro.dbt_date.spark__day_name", + "macro_sql": "\n\n{%- macro spark__day_name(date, short) -%}\n{%- set f = 'E' if short else 'EEEE' -%}\n date_format({{ date }}, '{{ f }}')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6575196, + "supported_languages": null + }, + "macro.dbt_date.trino__day_name": { + "name": "trino__day_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_name.sql", + "original_file_path": "macros/calendar_date/day_name.sql", + "unique_id": "macro.dbt_date.trino__day_name", + "macro_sql": "\n\n{%- macro trino__day_name(date, short) -%}\n{%- set f = 'a' if short else 'W' -%}\n date_format({{ date }}, '%{{ f }}')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6578743, + "supported_languages": null + }, + "macro.dbt_date.date_part": { + "name": "date_part", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/date_part.sql", + "original_file_path": "macros/calendar_date/date_part.sql", + "unique_id": "macro.dbt_date.date_part", + "macro_sql": "{% macro date_part(datepart, date) -%}\n {{ adapter.dispatch('date_part', 'dbt_date') (datepart, date) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6583927, + "supported_languages": null + }, + "macro.dbt_date.default__date_part": { + "name": "default__date_part", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/date_part.sql", + "original_file_path": "macros/calendar_date/date_part.sql", + "unique_id": "macro.dbt_date.default__date_part", + "macro_sql": "{% macro default__date_part(datepart, date) -%}\n date_part('{{ datepart }}', {{ date }})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.658595, + "supported_languages": null + }, + "macro.dbt_date.bigquery__date_part": { + "name": "bigquery__date_part", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/date_part.sql", + "original_file_path": "macros/calendar_date/date_part.sql", + "unique_id": "macro.dbt_date.bigquery__date_part", + "macro_sql": "{% macro bigquery__date_part(datepart, date) -%}\n extract({{ datepart }} from {{ date }})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.658794, + "supported_languages": null + }, + "macro.dbt_date.trino__date_part": { + "name": "trino__date_part", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/date_part.sql", + "original_file_path": "macros/calendar_date/date_part.sql", + "unique_id": "macro.dbt_date.trino__date_part", + "macro_sql": "{% macro trino__date_part(datepart, date) -%}\n extract({{ datepart }} from {{ date }})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6589913, + "supported_languages": null + }, + "macro.dbt_date.last_month": { + "name": "last_month", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/last_month.sql", + "original_file_path": "macros/calendar_date/last_month.sql", + "unique_id": "macro.dbt_date.last_month", + "macro_sql": "{%- macro last_month(tz=None) -%}\n{{ dbt_date.n_months_ago(1, tz) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.n_months_ago"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6593401, + "supported_languages": null + }, + "macro.dbt_date.tomorrow": { + "name": "tomorrow", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/tomorrow.sql", + "original_file_path": "macros/calendar_date/tomorrow.sql", + "unique_id": "macro.dbt_date.tomorrow", + "macro_sql": "{%- macro tomorrow(date=None, tz=None) -%}\n{{ dbt_date.n_days_away(1, date, tz) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.n_days_away"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6596854, + "supported_languages": null + }, + "macro.dbt_date.iso_week_start": { + "name": "iso_week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_start.sql", + "original_file_path": "macros/calendar_date/iso_week_start.sql", + "unique_id": "macro.dbt_date.iso_week_start", + "macro_sql": "{%- macro iso_week_start(date=None, tz=None) -%}\n{%-set dt = date if date else dbt_date.today(tz) -%}\n{{ adapter.dispatch('iso_week_start', 'dbt_date') (dt) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_date.today", + "macro.dbt_date.duckdb__iso_week_start" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6605382, + "supported_languages": null + }, + "macro.dbt_date._iso_week_start": { + "name": "_iso_week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_start.sql", + "original_file_path": "macros/calendar_date/iso_week_start.sql", + "unique_id": "macro.dbt_date._iso_week_start", + "macro_sql": "{%- macro _iso_week_start(date, week_type) -%}\ncast({{ dbt.date_trunc(week_type, date) }} as date)\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.date_trunc"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.660775, + "supported_languages": null + }, + "macro.dbt_date.default__iso_week_start": { + "name": "default__iso_week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_start.sql", + "original_file_path": "macros/calendar_date/iso_week_start.sql", + "unique_id": "macro.dbt_date.default__iso_week_start", + "macro_sql": "\n\n{%- macro default__iso_week_start(date) -%}\n{{ dbt_date._iso_week_start(date, 'isoweek') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_start"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6609864, + "supported_languages": null + }, + "macro.dbt_date.snowflake__iso_week_start": { + "name": "snowflake__iso_week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_start.sql", + "original_file_path": "macros/calendar_date/iso_week_start.sql", + "unique_id": "macro.dbt_date.snowflake__iso_week_start", + "macro_sql": "\n\n{%- macro snowflake__iso_week_start(date) -%}\n{{ dbt_date._iso_week_start(date, 'week') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_start"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6612356, + "supported_languages": null + }, + "macro.dbt_date.postgres__iso_week_start": { + "name": "postgres__iso_week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_start.sql", + "original_file_path": "macros/calendar_date/iso_week_start.sql", + "unique_id": "macro.dbt_date.postgres__iso_week_start", + "macro_sql": "\n\n{%- macro postgres__iso_week_start(date) -%}\n{{ dbt_date._iso_week_start(date, 'week') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_start"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6614513, + "supported_languages": null + }, + "macro.dbt_date.duckdb__iso_week_start": { + "name": "duckdb__iso_week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_start.sql", + "original_file_path": "macros/calendar_date/iso_week_start.sql", + "unique_id": "macro.dbt_date.duckdb__iso_week_start", + "macro_sql": "\n\n{%- macro duckdb__iso_week_start(date) -%}\n{{ return(dbt_date.postgres__iso_week_start(date)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.postgres__iso_week_start"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6616724, + "supported_languages": null + }, + "macro.dbt_date.spark__iso_week_start": { + "name": "spark__iso_week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_start.sql", + "original_file_path": "macros/calendar_date/iso_week_start.sql", + "unique_id": "macro.dbt_date.spark__iso_week_start", + "macro_sql": "\n\n{%- macro spark__iso_week_start(date) -%}\n{{ dbt_date._iso_week_start(date, 'week') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_start"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6618834, + "supported_languages": null + }, + "macro.dbt_date.trino__iso_week_start": { + "name": "trino__iso_week_start", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/iso_week_start.sql", + "original_file_path": "macros/calendar_date/iso_week_start.sql", + "unique_id": "macro.dbt_date.trino__iso_week_start", + "macro_sql": "\n\n{%- macro trino__iso_week_start(date) -%}\n{{ dbt_date._iso_week_start(date, 'week') }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date._iso_week_start"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.662092, + "supported_languages": null + }, + "macro.dbt_date.periods_since": { + "name": "periods_since", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/periods_since.sql", + "original_file_path": "macros/calendar_date/periods_since.sql", + "unique_id": "macro.dbt_date.periods_since", + "macro_sql": "{%- macro periods_since(date_col, period_name='day', tz=None) -%}\n{{ dbt.datediff(date_col, dbt_date.now(tz), period_name) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt.datediff", "macro.dbt_date.now"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6625016, + "supported_languages": null + }, + "macro.dbt_date.n_days_away": { + "name": "n_days_away", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/n_days_away.sql", + "original_file_path": "macros/calendar_date/n_days_away.sql", + "unique_id": "macro.dbt_date.n_days_away", + "macro_sql": "{%- macro n_days_away(n, date=None, tz=None) -%}\n{{ dbt_date.n_days_ago(-1 * n, date, tz) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.n_days_ago"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6628988, + "supported_languages": null + }, + "macro.dbt_date.day_of_week": { + "name": "day_of_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_week.sql", + "original_file_path": "macros/calendar_date/day_of_week.sql", + "unique_id": "macro.dbt_date.day_of_week", + "macro_sql": "{%- macro day_of_week(date, isoweek=true) -%}\n{{ adapter.dispatch('day_of_week', 'dbt_date') (date, isoweek) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.duckdb__day_of_week"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6658263, + "supported_languages": null + }, + "macro.dbt_date.default__day_of_week": { + "name": "default__day_of_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_week.sql", + "original_file_path": "macros/calendar_date/day_of_week.sql", + "unique_id": "macro.dbt_date.default__day_of_week", + "macro_sql": "\n\n{%- macro default__day_of_week(date, isoweek) -%}\n\n {%- set dow = dbt_date.date_part('dayofweek', date) -%}\n\n {%- if isoweek -%}\n case\n -- Shift start of week from Sunday (0) to Monday (1)\n when {{ dow }} = 0 then 7\n else {{ dow }}\n end\n {%- else -%}\n {{ dow }} + 1\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6662717, + "supported_languages": null + }, + "macro.dbt_date.snowflake__day_of_week": { + "name": "snowflake__day_of_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_week.sql", + "original_file_path": "macros/calendar_date/day_of_week.sql", + "unique_id": "macro.dbt_date.snowflake__day_of_week", + "macro_sql": "\n\n{%- macro snowflake__day_of_week(date, isoweek) -%}\n\n {%- if isoweek -%}\n {%- set dow_part = 'dayofweekiso' -%}\n {{ dbt_date.date_part(dow_part, date) }}\n {%- else -%}\n {%- set dow_part = 'dayofweek' -%}\n case\n when {{ dbt_date.date_part(dow_part, date) }} = 7 then 1\n else {{ dbt_date.date_part(dow_part, date) }} + 1\n end\n {%- endif -%}\n\n\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6669052, + "supported_languages": null + }, + "macro.dbt_date.bigquery__day_of_week": { + "name": "bigquery__day_of_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_week.sql", + "original_file_path": "macros/calendar_date/day_of_week.sql", + "unique_id": "macro.dbt_date.bigquery__day_of_week", + "macro_sql": "\n\n{%- macro bigquery__day_of_week(date, isoweek) -%}\n\n {%- set dow = dbt_date.date_part('dayofweek', date) -%}\n\n {%- if isoweek -%}\n case\n -- Shift start of week from Sunday (1) to Monday (2)\n when {{ dow }} = 1 then 7\n else {{ dow }} - 1\n end\n {%- else -%}\n {{ dow }}\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.667337, + "supported_languages": null + }, + "macro.dbt_date.postgres__day_of_week": { + "name": "postgres__day_of_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_week.sql", + "original_file_path": "macros/calendar_date/day_of_week.sql", + "unique_id": "macro.dbt_date.postgres__day_of_week", + "macro_sql": "\n\n\n{%- macro postgres__day_of_week(date, isoweek) -%}\n\n {%- if isoweek -%}\n {%- set dow_part = 'isodow' -%}\n -- Monday(1) to Sunday (7)\n cast({{ dbt_date.date_part(dow_part, date) }} as {{ dbt.type_int() }})\n {%- else -%}\n {%- set dow_part = 'dow' -%}\n -- Sunday(1) to Saturday (7)\n cast({{ dbt_date.date_part(dow_part, date) }} + 1 as {{ dbt.type_int() }})\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part", "macro.dbt.type_int"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6680436, + "supported_languages": null + }, + "macro.dbt_date.redshift__day_of_week": { + "name": "redshift__day_of_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_week.sql", + "original_file_path": "macros/calendar_date/day_of_week.sql", + "unique_id": "macro.dbt_date.redshift__day_of_week", + "macro_sql": "\n\n\n{%- macro redshift__day_of_week(date, isoweek) -%}\n\n {%- set dow = dbt_date.date_part('dayofweek', date) -%}\n\n {%- if isoweek -%}\n case\n -- Shift start of week from Sunday (0) to Monday (1)\n when {{ dow }} = 0 then 7\n else cast({{ dow }} as {{ dbt.type_bigint() }})\n end\n {%- else -%}\n cast({{ dow }} + 1 as {{ dbt.type_bigint() }})\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part", "macro.dbt.type_bigint"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6686776, + "supported_languages": null + }, + "macro.dbt_date.duckdb__day_of_week": { + "name": "duckdb__day_of_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_week.sql", + "original_file_path": "macros/calendar_date/day_of_week.sql", + "unique_id": "macro.dbt_date.duckdb__day_of_week", + "macro_sql": "\n\n{%- macro duckdb__day_of_week(date, isoweek) -%}\n{{ return(dbt_date.postgres__day_of_week(date, isoweek)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.postgres__day_of_week"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6690116, + "supported_languages": null + }, + "macro.dbt_date.spark__day_of_week": { + "name": "spark__day_of_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_week.sql", + "original_file_path": "macros/calendar_date/day_of_week.sql", + "unique_id": "macro.dbt_date.spark__day_of_week", + "macro_sql": "\n\n\n{%- macro spark__day_of_week(date, isoweek) -%}\n\n {%- set dow = \"dayofweek_iso\" if isoweek else \"dayofweek\" -%}\n\n {{ dbt_date.date_part(dow, date) }}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6693716, + "supported_languages": null + }, + "macro.dbt_date.trino__day_of_week": { + "name": "trino__day_of_week", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/day_of_week.sql", + "original_file_path": "macros/calendar_date/day_of_week.sql", + "unique_id": "macro.dbt_date.trino__day_of_week", + "macro_sql": "\n\n\n{%- macro trino__day_of_week(date, isoweek) -%}\n\n {%- set dow = dbt_date.date_part('day_of_week', date) -%}\n\n {%- if isoweek -%}\n {{ dow }}\n {%- else -%}\n case\n when {{ dow }} = 7 then 1\n else {{ dow }} + 1\n end\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.669869, + "supported_languages": null + }, + "macro.dbt_date.next_month_number": { + "name": "next_month_number", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/next_month_number.sql", + "original_file_path": "macros/calendar_date/next_month_number.sql", + "unique_id": "macro.dbt_date.next_month_number", + "macro_sql": "{%- macro next_month_number(tz=None) -%}\n{{ dbt_date.date_part('month', dbt_date.next_month(tz)) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.date_part", "macro.dbt_date.next_month"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6703598, + "supported_languages": null + }, + "macro.dbt_date.from_unixtimestamp": { + "name": "from_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/from_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/from_unixtimestamp.sql", + "unique_id": "macro.dbt_date.from_unixtimestamp", + "macro_sql": "{%- macro from_unixtimestamp(epochs, format=\"seconds\") -%}\n {{ adapter.dispatch('from_unixtimestamp', 'dbt_date') (epochs, format) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__from_unixtimestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.672868, + "supported_languages": null + }, + "macro.dbt_date.default__from_unixtimestamp": { + "name": "default__from_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/from_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/from_unixtimestamp.sql", + "unique_id": "macro.dbt_date.default__from_unixtimestamp", + "macro_sql": "\n\n{%- macro default__from_unixtimestamp(epochs, format=\"seconds\") -%}\n {%- if format != \"seconds\" -%}\n {{ exceptions.raise_compiler_error(\n \"value \" ~ format ~ \" for `format` for from_unixtimestamp is not supported.\"\n )\n }}\n {% endif -%}\n to_timestamp({{ epochs }})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6732945, + "supported_languages": null + }, + "macro.dbt_date.postgres__from_unixtimestamp": { + "name": "postgres__from_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/from_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/from_unixtimestamp.sql", + "unique_id": "macro.dbt_date.postgres__from_unixtimestamp", + "macro_sql": "\n\n{%- macro postgres__from_unixtimestamp(epochs, format=\"seconds\") -%}\n {%- if format != \"seconds\" -%}\n {{ exceptions.raise_compiler_error(\n \"value \" ~ format ~ \" for `format` for from_unixtimestamp is not supported.\"\n )\n }}\n {% endif -%}\n cast(to_timestamp({{ epochs }}) at time zone 'UTC' as timestamp)\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6737351, + "supported_languages": null + }, + "macro.dbt_date.snowflake__from_unixtimestamp": { + "name": "snowflake__from_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/from_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/from_unixtimestamp.sql", + "unique_id": "macro.dbt_date.snowflake__from_unixtimestamp", + "macro_sql": "\n\n{%- macro snowflake__from_unixtimestamp(epochs, format) -%}\n {%- if format == \"seconds\" -%}\n {%- set scale = 0 -%}\n {%- elif format == \"milliseconds\" -%}\n {%- set scale = 3 -%}\n {%- elif format == \"microseconds\" -%}\n {%- set scale = 6 -%}\n {%- else -%}\n {{ exceptions.raise_compiler_error(\n \"value \" ~ format ~ \" for `format` for from_unixtimestamp is not supported.\"\n )\n }}\n {% endif -%}\n to_timestamp_ntz({{ epochs }}, {{ scale }})\n\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6744602, + "supported_languages": null + }, + "macro.dbt_date.bigquery__from_unixtimestamp": { + "name": "bigquery__from_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/from_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/from_unixtimestamp.sql", + "unique_id": "macro.dbt_date.bigquery__from_unixtimestamp", + "macro_sql": "\n\n{%- macro bigquery__from_unixtimestamp(epochs, format) -%}\n {%- if format == \"seconds\" -%}\n timestamp_seconds({{ epochs }})\n {%- elif format == \"milliseconds\" -%}\n timestamp_millis({{ epochs }})\n {%- elif format == \"microseconds\" -%}\n timestamp_micros({{ epochs }})\n {%- else -%}\n {{ exceptions.raise_compiler_error(\n \"value \" ~ format ~ \" for `format` for from_unixtimestamp is not supported.\"\n )\n }}\n {% endif -%}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6750882, + "supported_languages": null + }, + "macro.dbt_date.trino__from_unixtimestamp": { + "name": "trino__from_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/from_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/from_unixtimestamp.sql", + "unique_id": "macro.dbt_date.trino__from_unixtimestamp", + "macro_sql": "\n\n{%- macro trino__from_unixtimestamp(epochs, format) -%}\n {%- if format == \"seconds\" -%}\n cast(from_unixtime({{ epochs }}) AT TIME ZONE 'UTC' as {{ dbt.type_timestamp() }})\n {%- elif format == \"milliseconds\" -%}\n cast(from_unixtime_nanos({{ epochs }} * pow(10, 6)) AT TIME ZONE 'UTC' as {{ dbt.type_timestamp() }})\n {%- elif format == \"microseconds\" -%}\n cast(from_unixtime_nanos({{ epochs }} * pow(10, 3)) AT TIME ZONE 'UTC' as {{ dbt.type_timestamp() }})\n {%- elif format == \"nanoseconds\" -%}\n cast(from_unixtime_nanos({{ epochs }}) AT TIME ZONE 'UTC' as {{ dbt.type_timestamp() }})\n {%- else -%}\n {{ exceptions.raise_compiler_error(\n \"value \" ~ format ~ \" for `format` for from_unixtimestamp is not supported.\"\n )\n }}\n {% endif -%}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.type_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6761453, + "supported_languages": null + }, + "macro.dbt_date.n_days_ago": { + "name": "n_days_ago", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/n_days_ago.sql", + "original_file_path": "macros/calendar_date/n_days_ago.sql", + "unique_id": "macro.dbt_date.n_days_ago", + "macro_sql": "{%- macro n_days_ago(n, date=None, tz=None) -%}\n{%-set dt = date if date else dbt_date.today(tz) -%}\n{%- set n = n|int -%}\ncast({{ dbt.dateadd('day', -1 * n, dt) }} as date)\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.today", "macro.dbt.dateadd"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6769214, + "supported_languages": null + }, + "macro.dbt_date.yesterday": { + "name": "yesterday", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/yesterday.sql", + "original_file_path": "macros/calendar_date/yesterday.sql", + "unique_id": "macro.dbt_date.yesterday", + "macro_sql": "{%- macro yesterday(date=None, tz=None) -%}\n{{ dbt_date.n_days_ago(1, date, tz) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.n_days_ago"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.677261, + "supported_languages": null + }, + "macro.dbt_date.n_months_ago": { + "name": "n_months_ago", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/n_months_ago.sql", + "original_file_path": "macros/calendar_date/n_months_ago.sql", + "unique_id": "macro.dbt_date.n_months_ago", + "macro_sql": "{%- macro n_months_ago(n, tz=None) -%}\n{%- set n = n|int -%}\n{{ dbt.date_trunc('month',\n dbt.dateadd('month', -1 * n,\n dbt_date.today(tz)\n )\n ) }}\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.date_trunc", + "macro.dbt.dateadd", + "macro.dbt_date.today" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6778407, + "supported_languages": null + }, + "macro.dbt_date.n_months_away": { + "name": "n_months_away", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/n_months_away.sql", + "original_file_path": "macros/calendar_date/n_months_away.sql", + "unique_id": "macro.dbt_date.n_months_away", + "macro_sql": "{%- macro n_months_away(n, tz=None) -%}\n{%- set n = n|int -%}\n{{ dbt.date_trunc('month',\n dbt.dateadd('month', n,\n dbt_date.today(tz)\n )\n ) }}\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.date_trunc", + "macro.dbt.dateadd", + "macro.dbt_date.today" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6783557, + "supported_languages": null + }, + "macro.dbt_date.last_month_name": { + "name": "last_month_name", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/last_month_name.sql", + "original_file_path": "macros/calendar_date/last_month_name.sql", + "unique_id": "macro.dbt_date.last_month_name", + "macro_sql": "{%- macro last_month_name(short=True, tz=None) -%}\n{{ dbt_date.month_name(dbt_date.last_month(tz), short=short) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.month_name", "macro.dbt_date.last_month"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6787455, + "supported_languages": null + }, + "macro.dbt_date.to_unixtimestamp": { + "name": "to_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/to_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/to_unixtimestamp.sql", + "unique_id": "macro.dbt_date.to_unixtimestamp", + "macro_sql": "{%- macro to_unixtimestamp(timestamp) -%}\n {{ adapter.dispatch('to_unixtimestamp', 'dbt_date') (timestamp) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__to_unixtimestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6792743, + "supported_languages": null + }, + "macro.dbt_date.default__to_unixtimestamp": { + "name": "default__to_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/to_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/to_unixtimestamp.sql", + "unique_id": "macro.dbt_date.default__to_unixtimestamp", + "macro_sql": "\n\n{%- macro default__to_unixtimestamp(timestamp) -%}\n {{ dbt_date.date_part('epoch', timestamp) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6794868, + "supported_languages": null + }, + "macro.dbt_date.snowflake__to_unixtimestamp": { + "name": "snowflake__to_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/to_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/to_unixtimestamp.sql", + "unique_id": "macro.dbt_date.snowflake__to_unixtimestamp", + "macro_sql": "\n\n{%- macro snowflake__to_unixtimestamp(timestamp) -%}\n {{ dbt_date.date_part('epoch_seconds', timestamp) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6796944, + "supported_languages": null + }, + "macro.dbt_date.bigquery__to_unixtimestamp": { + "name": "bigquery__to_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/to_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/to_unixtimestamp.sql", + "unique_id": "macro.dbt_date.bigquery__to_unixtimestamp", + "macro_sql": "\n\n{%- macro bigquery__to_unixtimestamp(timestamp) -%}\n unix_seconds({{ timestamp }})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6798546, + "supported_languages": null + }, + "macro.dbt_date.spark__to_unixtimestamp": { + "name": "spark__to_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/to_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/to_unixtimestamp.sql", + "unique_id": "macro.dbt_date.spark__to_unixtimestamp", + "macro_sql": "\n\n{%- macro spark__to_unixtimestamp(timestamp) -%}\n unix_timestamp({{ timestamp }})\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6800137, + "supported_languages": null + }, + "macro.dbt_date.trino__to_unixtimestamp": { + "name": "trino__to_unixtimestamp", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/to_unixtimestamp.sql", + "original_file_path": "macros/calendar_date/to_unixtimestamp.sql", + "unique_id": "macro.dbt_date.trino__to_unixtimestamp", + "macro_sql": "\n\n{%- macro trino__to_unixtimestamp(timestamp) -%}\n to_unixtime({{ timestamp }} AT TIME ZONE 'UTC')\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6801682, + "supported_languages": null + }, + "macro.dbt_date.week_of_year": { + "name": "week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_of_year.sql", + "original_file_path": "macros/calendar_date/week_of_year.sql", + "unique_id": "macro.dbt_date.week_of_year", + "macro_sql": "{%- macro week_of_year(date=None, tz=None) -%}\n{%-set dt = date if date else dbt_date.today(tz) -%}\n{{ adapter.dispatch('week_of_year', 'dbt_date') (dt) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [ + "macro.dbt_date.today", + "macro.dbt_date.duckdb__week_of_year" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6808453, + "supported_languages": null + }, + "macro.dbt_date.default__week_of_year": { + "name": "default__week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_of_year.sql", + "original_file_path": "macros/calendar_date/week_of_year.sql", + "unique_id": "macro.dbt_date.default__week_of_year", + "macro_sql": "{%- macro default__week_of_year(date) -%}\ncast({{ dbt_date.date_part('week', date) }} as {{ dbt.type_int() }})\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.date_part", "macro.dbt.type_int"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.681122, + "supported_languages": null + }, + "macro.dbt_date.postgres__week_of_year": { + "name": "postgres__week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_of_year.sql", + "original_file_path": "macros/calendar_date/week_of_year.sql", + "unique_id": "macro.dbt_date.postgres__week_of_year", + "macro_sql": "\n\n{%- macro postgres__week_of_year(date) -%}\n{# postgresql 'week' returns isoweek. Use to_char instead.\n WW = the first week starts on the first day of the year #}\ncast(to_char({{ date }}, 'WW') as {{ dbt.type_int() }})\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.type_int"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6813743, + "supported_languages": null + }, + "macro.dbt_date.duckdb__week_of_year": { + "name": "duckdb__week_of_year", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/week_of_year.sql", + "original_file_path": "macros/calendar_date/week_of_year.sql", + "unique_id": "macro.dbt_date.duckdb__week_of_year", + "macro_sql": "\n\n{%- macro duckdb__week_of_year(date) -%}\ncast(ceil(dayofyear({{ date }}) / 7) as int)\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6815426, + "supported_languages": null + }, + "macro.dbt_date.next_month": { + "name": "next_month", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/next_month.sql", + "original_file_path": "macros/calendar_date/next_month.sql", + "unique_id": "macro.dbt_date.next_month", + "macro_sql": "{%- macro next_month(tz=None) -%}\n{{ dbt_date.n_months_away(1, tz) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_date.n_months_away"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.681812, + "supported_languages": null + }, + "macro.dbt_date.n_weeks_away": { + "name": "n_weeks_away", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/calendar_date/n_weeks_away.sql", + "original_file_path": "macros/calendar_date/n_weeks_away.sql", + "unique_id": "macro.dbt_date.n_weeks_away", + "macro_sql": "{%- macro n_weeks_away(n, tz=None) -%}\n{%- set n = n|int -%}\n{{ dbt.date_trunc('week',\n dbt.dateadd('week', n,\n dbt_date.today(tz)\n )\n ) }}\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.date_trunc", + "macro.dbt.dateadd", + "macro.dbt_date.today" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6823452, + "supported_languages": null + }, + "macro.dbt_date.get_fiscal_periods": { + "name": "get_fiscal_periods", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/fiscal_date/get_fiscal_periods.sql", + "original_file_path": "macros/fiscal_date/get_fiscal_periods.sql", + "unique_id": "macro.dbt_date.get_fiscal_periods", + "macro_sql": "{% macro get_fiscal_periods(dates, year_end_month, week_start_day, shift_year=1) %}\n{#\nThis macro requires you to pass in a ref to a date dimension, created via\ndbt_date.get_date_dimension()s\n#}\nwith fscl_year_dates_for_periods as (\n {{ dbt_date.get_fiscal_year_dates(dates, year_end_month, week_start_day, shift_year) }}\n),\nfscl_year_w13 as (\n\n select\n f.*,\n -- We count the weeks in a 13 week period\n -- and separate the 4-5-4 week sequences\n mod(cast(\n (f.fiscal_week_of_year-1) as {{ dbt.type_int() }}\n ), 13) as w13_number,\n -- Chop weeks into 13 week merch quarters\n cast(\n least(\n floor((f.fiscal_week_of_year-1)/13.0)\n , 3)\n as {{ dbt.type_int() }}) as quarter_number\n from\n fscl_year_dates_for_periods f\n\n),\nfscl_periods as (\n\n select\n f.date_day,\n f.fiscal_year_number,\n f.week_start_date,\n f.week_end_date,\n f.fiscal_week_of_year,\n case\n -- we move week 53 into the 3rd period of the quarter\n when f.fiscal_week_of_year = 53 then 3\n when f.w13_number between 0 and 3 then 1\n when f.w13_number between 4 and 8 then 2\n when f.w13_number between 9 and 12 then 3\n end as period_of_quarter,\n f.quarter_number\n from\n fscl_year_w13 f\n\n),\nfscl_periods_quarters as (\n\n select\n f.*,\n cast((\n (f.quarter_number * 3) + f.period_of_quarter\n ) as {{ dbt.type_int() }}) as fiscal_period_number\n from\n fscl_periods f\n\n)\nselect\n date_day,\n fiscal_year_number,\n week_start_date,\n week_end_date,\n fiscal_week_of_year,\n dense_rank() over(partition by fiscal_period_number order by fiscal_week_of_year) as fiscal_week_of_period,\n fiscal_period_number,\n quarter_number+1 as fiscal_quarter_number,\n period_of_quarter as fiscal_period_of_quarter\nfrom\n fscl_periods_quarters\norder by 1,2\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.get_fiscal_year_dates", "macro.dbt.type_int"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6833506, + "supported_languages": null + }, + "macro.dbt_date.get_fiscal_year_dates": { + "name": "get_fiscal_year_dates", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/fiscal_date/get_fiscal_year_dates.sql", + "original_file_path": "macros/fiscal_date/get_fiscal_year_dates.sql", + "unique_id": "macro.dbt_date.get_fiscal_year_dates", + "macro_sql": "{% macro get_fiscal_year_dates(dates, year_end_month=12, week_start_day=1, shift_year=1) %}\n{{ adapter.dispatch('get_fiscal_year_dates', 'dbt_date') (dates, year_end_month, week_start_day, shift_year) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__get_fiscal_year_dates"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6851087, + "supported_languages": null + }, + "macro.dbt_date.default__get_fiscal_year_dates": { + "name": "default__get_fiscal_year_dates", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/fiscal_date/get_fiscal_year_dates.sql", + "original_file_path": "macros/fiscal_date/get_fiscal_year_dates.sql", + "unique_id": "macro.dbt_date.default__get_fiscal_year_dates", + "macro_sql": "{% macro default__get_fiscal_year_dates(dates, year_end_month, week_start_day, shift_year) %}\n-- this gets all the dates within a fiscal year\n-- determined by the given year-end-month\n-- ending on the saturday closest to that month's end date\nwith fsc_date_dimension as (\n select * from {{ dates }}\n),\nyear_month_end as (\n\n select\n d.year_number - {{ shift_year }} as fiscal_year_number,\n d.month_end_date\n from\n fsc_date_dimension d\n where\n d.month_of_year = {{ year_end_month }}\n group by 1,2\n\n),\nweeks as (\n\n select\n d.year_number,\n d.month_of_year,\n d.date_day as week_start_date,\n cast({{ dbt.dateadd('day', 6, 'd.date_day') }} as date) as week_end_date\n from\n fsc_date_dimension d\n where\n d.day_of_week = {{ week_start_day }}\n\n),\n-- get all the weeks that start in the month the year ends\nyear_week_ends as (\n\n select\n d.year_number - {{ shift_year }} as fiscal_year_number,\n d.week_end_date\n from\n weeks d\n where\n d.month_of_year = {{ year_end_month }}\n group by\n 1,2\n\n),\n-- then calculate which Saturday is closest to month end\nweeks_at_month_end as (\n\n select\n d.fiscal_year_number,\n d.week_end_date,\n m.month_end_date,\n rank() over\n (partition by d.fiscal_year_number\n order by\n abs({{ dbt.datediff('d.week_end_date', 'm.month_end_date', 'day') }})\n\n ) as closest_to_month_end\n from\n year_week_ends d\n join\n year_month_end m on d.fiscal_year_number = m.fiscal_year_number\n),\nfiscal_year_range as (\n\n select\n w.fiscal_year_number,\n cast(\n {{ dbt.dateadd('day', 1,\n 'lag(w.week_end_date) over(order by w.week_end_date)') }}\n as date) as fiscal_year_start_date,\n w.week_end_date as fiscal_year_end_date\n from\n weeks_at_month_end w\n where\n w.closest_to_month_end = 1\n\n),\nfiscal_year_dates as (\n\n select\n d.date_day,\n m.fiscal_year_number,\n m.fiscal_year_start_date,\n m.fiscal_year_end_date,\n w.week_start_date,\n w.week_end_date,\n -- we reset the weeks of the year starting with the merch year start date\n dense_rank()\n over(\n partition by m.fiscal_year_number\n order by w.week_start_date\n ) as fiscal_week_of_year\n from\n fsc_date_dimension d\n join\n fiscal_year_range m on d.date_day between m.fiscal_year_start_date and m.fiscal_year_end_date\n join\n weeks w on d.date_day between w.week_start_date and w.week_end_date\n\n)\nselect * from fiscal_year_dates order by 1\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt.dateadd", "macro.dbt.datediff"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6859965, + "supported_languages": null + }, + "macro.dbt_date.get_powers_of_two": { + "name": "get_powers_of_two", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/generate_series.sql", + "original_file_path": "macros/_utils/generate_series.sql", + "unique_id": "macro.dbt_date.get_powers_of_two", + "macro_sql": "{% macro get_powers_of_two(upper_bound) %}\n {{ return(adapter.dispatch('get_powers_of_two', 'dbt_date')(upper_bound)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__get_powers_of_two"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6870835, + "supported_languages": null + }, + "macro.dbt_date.default__get_powers_of_two": { + "name": "default__get_powers_of_two", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/generate_series.sql", + "original_file_path": "macros/_utils/generate_series.sql", + "unique_id": "macro.dbt_date.default__get_powers_of_two", + "macro_sql": "{% macro default__get_powers_of_two(upper_bound) %}\n\n {% if upper_bound <= 0 %}\n {{ exceptions.raise_compiler_error(\"upper bound must be positive\") }}\n {% endif %}\n\n {% for _ in range(1, 100) %}\n {% if upper_bound <= 2 ** loop.index %}{{ return(loop.index) }}{% endif %}\n {% endfor %}\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6878717, + "supported_languages": null + }, + "macro.dbt_date.generate_series": { + "name": "generate_series", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/generate_series.sql", + "original_file_path": "macros/_utils/generate_series.sql", + "unique_id": "macro.dbt_date.generate_series", + "macro_sql": "{% macro generate_series(upper_bound) %}\n {{ return(adapter.dispatch('generate_series', 'dbt_date')(upper_bound)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__generate_series"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6881993, + "supported_languages": null + }, + "macro.dbt_date.default__generate_series": { + "name": "default__generate_series", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/generate_series.sql", + "original_file_path": "macros/_utils/generate_series.sql", + "unique_id": "macro.dbt_date.default__generate_series", + "macro_sql": "{% macro default__generate_series(upper_bound) %}\n\n {% set n = dbt_date.get_powers_of_two(upper_bound) %}\n\n with p as (\n select 0 as generated_number union all select 1\n ), unioned as (\n\n select\n\n {% for i in range(n) %}\n p{{i}}.generated_number * power(2, {{i}})\n {% if not loop.last %} + {% endif %}\n {% endfor %}\n + 1\n as generated_number\n\n from\n\n {% for i in range(n) %}\n p as p{{i}}\n {% if not loop.last %} cross join {% endif %}\n {% endfor %}\n\n )\n\n select *\n from unioned\n where generated_number <= {{upper_bound}}\n order by generated_number\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.get_powers_of_two"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6891308, + "supported_languages": null + }, + "macro.dbt_date.date": { + "name": "date", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/modules_datetime.sql", + "original_file_path": "macros/_utils/modules_datetime.sql", + "unique_id": "macro.dbt_date.date", + "macro_sql": "{% macro date(year, month, day) %}\n {{ return(modules.datetime.date(year, month, day)) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.689736, + "supported_languages": null + }, + "macro.dbt_date.datetime": { + "name": "datetime", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/modules_datetime.sql", + "original_file_path": "macros/_utils/modules_datetime.sql", + "unique_id": "macro.dbt_date.datetime", + "macro_sql": "{% macro datetime(year, month, day, hour=0, minute=0, second=0, microsecond=0, tz=None) %}\n {% set tz = tz if tz else var(\"dbt_date:time_zone\") %}\n {{ return(\n modules.datetime.datetime(\n year=year, month=month, day=day, hour=hour,\n minute=minute, second=second, microsecond=microsecond,\n tzinfo=modules.pytz.timezone(tz)\n )\n ) }}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6906106, + "supported_languages": null + }, + "macro.dbt_date.get_intervals_between": { + "name": "get_intervals_between", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/date_spine.sql", + "original_file_path": "macros/_utils/date_spine.sql", + "unique_id": "macro.dbt_date.get_intervals_between", + "macro_sql": "{% macro get_intervals_between(start_date, end_date, datepart) -%}\n {{ return(adapter.dispatch('get_intervals_between', 'dbt_date')(start_date, end_date, datepart)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__get_intervals_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6915917, + "supported_languages": null + }, + "macro.dbt_date.default__get_intervals_between": { + "name": "default__get_intervals_between", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/date_spine.sql", + "original_file_path": "macros/_utils/date_spine.sql", + "unique_id": "macro.dbt_date.default__get_intervals_between", + "macro_sql": "{% macro default__get_intervals_between(start_date, end_date, datepart) -%}\n {%- call statement('get_intervals_between', fetch_result=True) %}\n\n select {{ dbt.datediff(start_date, end_date, datepart) }}\n\n {%- endcall -%}\n\n {%- set value_list = load_result('get_intervals_between') -%}\n\n {%- if value_list and value_list['data'] -%}\n {%- set values = value_list['data'] | map(attribute=0) | list %}\n {{ return(values[0]) }}\n {%- else -%}\n {{ return(1) }}\n {%- endif -%}\n\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt.statement", "macro.dbt.datediff"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6925313, + "supported_languages": null + }, + "macro.dbt_date.date_spine": { + "name": "date_spine", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/date_spine.sql", + "original_file_path": "macros/_utils/date_spine.sql", + "unique_id": "macro.dbt_date.date_spine", + "macro_sql": "{% macro date_spine(datepart, start_date, end_date) %}\n {{ return(adapter.dispatch('date_spine', 'dbt_date')(datepart, start_date, end_date)) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_date.default__date_spine"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6929178, + "supported_languages": null + }, + "macro.dbt_date.default__date_spine": { + "name": "default__date_spine", + "resource_type": "macro", + "package_name": "dbt_date", + "path": "macros/_utils/date_spine.sql", + "original_file_path": "macros/_utils/date_spine.sql", + "unique_id": "macro.dbt_date.default__date_spine", + "macro_sql": "{% macro default__date_spine(datepart, start_date, end_date) %}\n\n\n{# call as follows:\n\ndate_spine(\n \"day\",\n \"to_date('01/01/2016', 'mm/dd/yyyy')\",\n \"dbt.dateadd(week, 1, current_date)\"\n) #}\n\n\nwith rawdata as (\n\n {{\n dbt_date.generate_series(\n dbt_date.get_intervals_between(start_date, end_date, datepart)\n )\n }}\n\n),\n\nall_periods as (\n\n select (\n {{\n dbt.dateadd(\n datepart,\n \"(row_number() over (order by 1) - 1)\",\n start_date\n )\n }}\n ) as date_{{datepart}}\n from rawdata\n\n),\n\nfiltered as (\n\n select *\n from all_periods\n where date_{{datepart}} <= {{ end_date }}\n\n)\n\nselect * from filtered\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_date.generate_series", + "macro.dbt_date.get_intervals_between", + "macro.dbt.dateadd" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6934605, + "supported_languages": null + }, + "macro.dbt_expectations.type_timestamp": { + "name": "type_timestamp", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.type_timestamp", + "macro_sql": "\n{%- macro type_timestamp() -%}\n {{ return(adapter.dispatch('type_timestamp', 'dbt_expectations')()) }}\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_expectations.default__type_timestamp"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6940901, + "supported_languages": null + }, + "macro.dbt_expectations.default__type_timestamp": { + "name": "default__type_timestamp", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.default__type_timestamp", + "macro_sql": "{% macro default__type_timestamp() -%}\n timestamp\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6942463, + "supported_languages": null + }, + "macro.dbt_expectations.snowflake__type_timestamp": { + "name": "snowflake__type_timestamp", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.snowflake__type_timestamp", + "macro_sql": "{% macro snowflake__type_timestamp() -%}\n timestamp_ntz\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6943686, + "supported_languages": null + }, + "macro.dbt_expectations.postgres__type_timestamp": { + "name": "postgres__type_timestamp", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.postgres__type_timestamp", + "macro_sql": "{% macro postgres__type_timestamp() -%}\n timestamp without time zone\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6944852, + "supported_languages": null + }, + "macro.dbt_expectations.type_datetime": { + "name": "type_datetime", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.type_datetime", + "macro_sql": "{% macro type_datetime() -%}\n {{ return(adapter.dispatch('type_datetime', 'dbt_expectations')()) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.duckdb__type_datetime"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6947303, + "supported_languages": null + }, + "macro.dbt_expectations.default__type_datetime": { + "name": "default__type_datetime", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.default__type_datetime", + "macro_sql": "{% macro default__type_datetime() -%}\n datetime\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6948469, + "supported_languages": null + }, + "macro.dbt_expectations.snowflake__type_datetime": { + "name": "snowflake__type_datetime", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.snowflake__type_datetime", + "macro_sql": "{% macro snowflake__type_datetime() -%}\n timestamp_ntz\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6949792, + "supported_languages": null + }, + "macro.dbt_expectations.postgres__type_datetime": { + "name": "postgres__type_datetime", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.postgres__type_datetime", + "macro_sql": "{% macro postgres__type_datetime() -%}\n timestamp without time zone\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6950943, + "supported_languages": null + }, + "macro.dbt_expectations.duckdb__type_datetime": { + "name": "duckdb__type_datetime", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.duckdb__type_datetime", + "macro_sql": "{% macro duckdb__type_datetime() -%}\n timestamp\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6953235, + "supported_languages": null + }, + "macro.dbt_expectations.spark__type_datetime": { + "name": "spark__type_datetime", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/datatypes.sql", + "original_file_path": "macros/utils/datatypes.sql", + "unique_id": "macro.dbt_expectations.spark__type_datetime", + "macro_sql": "{% macro spark__type_datetime() -%}\n timestamp\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6954389, + "supported_languages": null + }, + "macro.dbt_expectations.group_by": { + "name": "group_by", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/groupby.sql", + "original_file_path": "macros/utils/groupby.sql", + "unique_id": "macro.dbt_expectations.group_by", + "macro_sql": "{%- macro group_by(n) -%}\n {{ return(adapter.dispatch('group_by', 'dbt_expectations')(n)) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.default__group_by"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.69585, + "supported_languages": null + }, + "macro.dbt_expectations.default__group_by": { + "name": "default__group_by", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/utils/groupby.sql", + "original_file_path": "macros/utils/groupby.sql", + "unique_id": "macro.dbt_expectations.default__group_by", + "macro_sql": "\n\n{%- macro default__group_by(n) -%}\n\n group by {% for i in range(1, n + 1) -%}\n {{ i }}{{ ',' if not loop.last }}\n {%- endfor -%}\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.696272, + "supported_languages": null + }, + "macro.dbt_expectations.test_expression_is_true": { + "name": "test_expression_is_true", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/expression_is_true.sql", + "original_file_path": "macros/schema_tests/_generalized/expression_is_true.sql", + "unique_id": "macro.dbt_expectations.test_expression_is_true", + "macro_sql": "{% test expression_is_true(model,\n expression,\n test_condition=\"= true\",\n group_by_columns=None,\n row_condition=None\n ) %}\n\n {{ dbt_expectations.expression_is_true(model, expression, test_condition, group_by_columns, row_condition) }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6973298, + "supported_languages": null + }, + "macro.dbt_expectations.expression_is_true": { + "name": "expression_is_true", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/expression_is_true.sql", + "original_file_path": "macros/schema_tests/_generalized/expression_is_true.sql", + "unique_id": "macro.dbt_expectations.expression_is_true", + "macro_sql": "{% macro expression_is_true(model,\n expression,\n test_condition=\"= true\",\n group_by_columns=None,\n row_condition=None\n ) %}\n {{ adapter.dispatch('expression_is_true', 'dbt_expectations') (model, expression, test_condition, group_by_columns, row_condition) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.default__expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6978314, + "supported_languages": null + }, + "macro.dbt_expectations.default__expression_is_true": { + "name": "default__expression_is_true", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/expression_is_true.sql", + "original_file_path": "macros/schema_tests/_generalized/expression_is_true.sql", + "unique_id": "macro.dbt_expectations.default__expression_is_true", + "macro_sql": "{% macro default__expression_is_true(model, expression, test_condition, group_by_columns, row_condition) -%}\nwith grouped_expression as (\n select\n {% if group_by_columns %}\n {% for group_by_column in group_by_columns -%}\n {{ group_by_column }} as col_{{ loop.index }},\n {% endfor -%}\n {% endif %}\n {{ dbt_expectations.truth_expression(expression) }}\n from {{ model }}\n {%- if row_condition %}\n where\n {{ row_condition }}\n {% endif %}\n {% if group_by_columns %}\n group by\n {% for group_by_column in group_by_columns -%}\n {{ group_by_column }}{% if not loop.last %},{% endif %}\n {% endfor %}\n {% endif %}\n\n),\nvalidation_errors as (\n\n select\n *\n from\n grouped_expression\n where\n not(expression {{ test_condition }})\n\n)\n\nselect *\nfrom validation_errors\n\n\n{% endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.truth_expression"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.698793, + "supported_languages": null + }, + "macro.dbt_expectations.truth_expression": { + "name": "truth_expression", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/_truth_expression.sql", + "original_file_path": "macros/schema_tests/_generalized/_truth_expression.sql", + "unique_id": "macro.dbt_expectations.truth_expression", + "macro_sql": "{% macro truth_expression(expression) %}\n {{ adapter.dispatch('truth_expression', 'dbt_expectations') (expression) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.default__truth_expression"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6991432, + "supported_languages": null + }, + "macro.dbt_expectations.default__truth_expression": { + "name": "default__truth_expression", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/_truth_expression.sql", + "original_file_path": "macros/schema_tests/_generalized/_truth_expression.sql", + "unique_id": "macro.dbt_expectations.default__truth_expression", + "macro_sql": "{% macro default__truth_expression(expression) %}\n {{ expression }} as expression\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.6993015, + "supported_languages": null + }, + "macro.dbt_expectations.test_expression_between": { + "name": "test_expression_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/expression_between.sql", + "original_file_path": "macros/schema_tests/_generalized/expression_between.sql", + "unique_id": "macro.dbt_expectations.test_expression_between", + "macro_sql": "{% test expression_between(model,\n expression,\n min_value=None,\n max_value=None,\n group_by_columns=None,\n row_condition=None,\n strictly=False\n ) %}\n\n {{ dbt_expectations.expression_between(model, expression, min_value, max_value, group_by_columns, row_condition, strictly) }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7005556, + "supported_languages": null + }, + "macro.dbt_expectations.expression_between": { + "name": "expression_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/expression_between.sql", + "original_file_path": "macros/schema_tests/_generalized/expression_between.sql", + "unique_id": "macro.dbt_expectations.expression_between", + "macro_sql": "{% macro expression_between(model,\n expression,\n min_value,\n max_value,\n group_by_columns,\n row_condition,\n strictly\n ) %}\n\n{%- if min_value is none and max_value is none -%}\n{{ exceptions.raise_compiler_error(\n \"You have to provide either a min_value, max_value or both.\"\n) }}\n{%- endif -%}\n\n{%- set strict_operator = \"\" if strictly else \"=\" -%}\n\n{% set expression_min_max %}\n( 1=1\n{%- if min_value is not none %} and {{ expression | trim }} >{{ strict_operator }} {{ min_value }}{% endif %}\n{%- if max_value is not none %} and {{ expression | trim }} <{{ strict_operator }} {{ max_value }}{% endif %}\n)\n{% endset %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression_min_max,\n group_by_columns=group_by_columns,\n row_condition=row_condition)\n }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7016885, + "supported_languages": null + }, + "macro.dbt_expectations.get_select": { + "name": "get_select", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/equal_expression.sql", + "original_file_path": "macros/schema_tests/_generalized/equal_expression.sql", + "unique_id": "macro.dbt_expectations.get_select", + "macro_sql": "{% macro get_select(model, expression, row_condition, group_by) -%}\n {{ adapter.dispatch('get_select', 'dbt_expectations') (model, expression, row_condition, group_by) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.default__get_select"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7066824, + "supported_languages": null + }, + "macro.dbt_expectations.default__get_select": { + "name": "default__get_select", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/equal_expression.sql", + "original_file_path": "macros/schema_tests/_generalized/equal_expression.sql", + "unique_id": "macro.dbt_expectations.default__get_select", + "macro_sql": "\n\n{%- macro default__get_select(model, expression, row_condition, group_by) %}\n select\n {% if group_by %}\n {% for g in group_by -%}\n {{ g }} as col_{{ loop.index }},\n {% endfor -%}\n {% endif %}\n {{ expression }} as expression\n from\n {{ model }}\n {%- if row_condition %}\n where\n {{ row_condition }}\n {% endif %}\n {% if group_by %}\n group by\n {% for g in group_by -%}\n {{ loop.index }}{% if not loop.last %},{% endif %}\n {% endfor %}\n {% endif %}\n{% endmacro -%}\n\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7076185, + "supported_languages": null + }, + "macro.dbt_expectations.test_equal_expression": { + "name": "test_equal_expression", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/equal_expression.sql", + "original_file_path": "macros/schema_tests/_generalized/equal_expression.sql", + "unique_id": "macro.dbt_expectations.test_equal_expression", + "macro_sql": "{% test equal_expression(model, expression,\n compare_model=None,\n compare_expression=None,\n group_by=None,\n compare_group_by=None,\n row_condition=None,\n compare_row_condition=None,\n tolerance=0.0,\n tolerance_percent=None\n ) -%}\n\n {{ adapter.dispatch('test_equal_expression', 'dbt_expectations') (\n model,\n expression,\n compare_model,\n compare_expression,\n group_by,\n compare_group_by,\n row_condition,\n compare_row_condition,\n tolerance,\n tolerance_percent) }}\n{%- endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.default__test_equal_expression"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.708383, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_equal_expression": { + "name": "default__test_equal_expression", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/equal_expression.sql", + "original_file_path": "macros/schema_tests/_generalized/equal_expression.sql", + "unique_id": "macro.dbt_expectations.default__test_equal_expression", + "macro_sql": "\n\n{%- macro default__test_equal_expression(\n model,\n expression,\n compare_model,\n compare_expression,\n group_by,\n compare_group_by,\n row_condition,\n compare_row_condition,\n tolerance,\n tolerance_percent) -%}\n\n {%- set compare_model = model if not compare_model else compare_model -%}\n {%- set compare_expression = expression if not compare_expression else compare_expression -%}\n {%- set compare_row_condition = row_condition if not compare_row_condition else compare_row_condition -%}\n {%- set compare_group_by = group_by if not compare_group_by else compare_group_by -%}\n\n {%- set n_cols = (group_by|length) if group_by else 0 %}\n with a as (\n {{ dbt_expectations.get_select(model, expression, row_condition, group_by) }}\n ),\n b as (\n {{ dbt_expectations.get_select(compare_model, compare_expression, compare_row_condition, compare_group_by) }}\n ),\n final as (\n\n select\n {% for i in range(1, n_cols + 1) -%}\n coalesce(a.col_{{ i }}, b.col_{{ i }}) as col_{{ i }},\n {% endfor %}\n a.expression,\n b.expression as compare_expression,\n abs(coalesce(a.expression, 0) - coalesce(b.expression, 0)) as expression_difference,\n abs(coalesce(a.expression, 0) - coalesce(b.expression, 0))/\n nullif(a.expression * 1.0, 0) as expression_difference_percent\n from\n {% if n_cols > 0 %}\n a\n full outer join\n b on\n {% for i in range(1, n_cols + 1) -%}\n a.col_{{ i }} = b.col_{{ i }} {% if not loop.last %}and{% endif %}\n {% endfor -%}\n {% else %}\n a cross join b\n {% endif %}\n )\n -- DEBUG:\n -- select * from final\n select\n *\n from final\n where\n {% if tolerance_percent %}\n expression_difference_percent > {{ tolerance_percent }}\n {% else %}\n expression_difference > {{ tolerance }}\n {% endif %}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.get_select"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7103994, + "supported_languages": null + }, + "macro.dbt_expectations.ignore_row_if_expression": { + "name": "ignore_row_if_expression", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/_ignore_row_if_expression.sql", + "original_file_path": "macros/schema_tests/_generalized/_ignore_row_if_expression.sql", + "unique_id": "macro.dbt_expectations.ignore_row_if_expression", + "macro_sql": "{% macro ignore_row_if_expression(ignore_row_if, columns) %}\n {{ adapter.dispatch('ignore_row_if_expression', 'dbt_expectations') (ignore_row_if, columns) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.default__ignore_row_if_expression"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.711029, + "supported_languages": null + }, + "macro.dbt_expectations.default__ignore_row_if_expression": { + "name": "default__ignore_row_if_expression", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/_generalized/_ignore_row_if_expression.sql", + "original_file_path": "macros/schema_tests/_generalized/_ignore_row_if_expression.sql", + "unique_id": "macro.dbt_expectations.default__ignore_row_if_expression", + "macro_sql": "{% macro default__ignore_row_if_expression(ignore_row_if, columns) %}\n {%- set ignore_row_if_values = [\"all_values_are_missing\", \"any_value_is_missing\"] -%}\n {% if ignore_row_if not in ignore_row_if_values %}\n {{ exceptions.raise_compiler_error(\n \"`ignore_row_if` must be one of \" ~ (ignore_row_if_values | join(\", \")) ~ \". Got: '\" ~ ignore_row_if ~\"'.'\"\n ) }}\n {% endif %}\n\n {%- set op = \"and\" if ignore_row_if == \"all_values_are_missing\" else \"or\" -%}\n not (\n {% for column in columns -%}\n {{ column }} is null{% if not loop.last %} {{ op }} {% endif %}\n {% endfor %}\n )\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.712086, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_value_lengths_to_be_between": { + "name": "test_expect_column_value_lengths_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_value_lengths_to_be_between.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_value_lengths_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_value_lengths_to_be_between", + "macro_sql": "{% test expect_column_value_lengths_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n row_condition=None,\n strictly=False\n ) %}\n{% set expression %}\n{{ dbt.length(column_name) }}\n{% endset %}\n\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=None,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt.length", + "macro.dbt_expectations.expression_between" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7130628, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_not_match_regex_list": { + "name": "test_expect_column_values_to_not_match_regex_list", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_values_to_not_match_regex_list.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_values_to_not_match_regex_list.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_not_match_regex_list", + "macro_sql": "{% test expect_column_values_to_not_match_regex_list(model, column_name,\n regex_list,\n match_on=\"any\",\n row_condition=None,\n is_raw=False,\n flags=\"\"\n ) %}\n\n{% set expression %}\n{% for regex in regex_list %}\n{{ dbt_expectations.regexp_instr(column_name, regex, is_raw=is_raw, flags=flags) }} = 0\n{%- if not loop.last %}\n{{ \" and \" if match_on == \"all\" else \" or \"}}\n{% endif -%}\n{% endfor %}\n{% endset %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.regexp_instr", + "macro.dbt_expectations.expression_is_true" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.714433, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_not_match_like_pattern": { + "name": "test_expect_column_values_to_not_match_like_pattern", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_values_to_not_match_like_pattern.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_values_to_not_match_like_pattern.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_not_match_like_pattern", + "macro_sql": "{% test expect_column_values_to_not_match_like_pattern(model, column_name,\n like_pattern,\n row_condition=None\n ) %}\n\n{% set expression = dbt_expectations._get_like_pattern_expression(column_name, like_pattern, positive=False) %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations._get_like_pattern_expression", + "macro.dbt_expectations.expression_is_true" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7151265, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_not_match_regex": { + "name": "test_expect_column_values_to_not_match_regex", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_values_to_not_match_regex.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_values_to_not_match_regex.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_not_match_regex", + "macro_sql": "{% test expect_column_values_to_not_match_regex(model, column_name,\n regex,\n row_condition=None,\n is_raw=False,\n flags=\"\"\n ) %}\n\n{% set expression %}\n{{ dbt_expectations.regexp_instr(column_name, regex, is_raw=is_raw, flags=flags) }} = 0\n{% endset %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.regexp_instr", + "macro.dbt_expectations.expression_is_true" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7160501, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_match_like_pattern_list": { + "name": "test_expect_column_values_to_match_like_pattern_list", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_values_to_match_like_pattern_list.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_values_to_match_like_pattern_list.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_match_like_pattern_list", + "macro_sql": "{% test expect_column_values_to_match_like_pattern_list(model, column_name,\n like_pattern_list,\n match_on=\"any\",\n row_condition=None\n ) %}\n\n{% set expression %}\n {% for like_pattern in like_pattern_list %}\n {{ dbt_expectations._get_like_pattern_expression(column_name, like_pattern, positive=True) }}\n {%- if not loop.last %}\n {{ \" and \" if match_on == \"all\" else \" or \"}}\n {% endif -%}\n {% endfor %}\n{% endset %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations._get_like_pattern_expression", + "macro.dbt_expectations.expression_is_true" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7173173, + "supported_languages": null + }, + "macro.dbt_expectations._get_like_pattern_expression": { + "name": "_get_like_pattern_expression", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/_get_like_pattern_expression.sql", + "original_file_path": "macros/schema_tests/string_matching/_get_like_pattern_expression.sql", + "unique_id": "macro.dbt_expectations._get_like_pattern_expression", + "macro_sql": "{% macro _get_like_pattern_expression(column_name, like_pattern, positive) %}\n{{ column_name }} {{ \"not\" if not positive else \"\" }} like '{{ like_pattern }}'\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7177145, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_match_like_pattern": { + "name": "test_expect_column_values_to_match_like_pattern", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_values_to_match_like_pattern.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_values_to_match_like_pattern.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_match_like_pattern", + "macro_sql": "{% test expect_column_values_to_match_like_pattern(model, column_name,\n like_pattern,\n row_condition=None\n ) %}\n\n{% set expression = dbt_expectations._get_like_pattern_expression(column_name, like_pattern, positive=True) %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations._get_like_pattern_expression", + "macro.dbt_expectations.expression_is_true" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7183738, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_match_regex": { + "name": "test_expect_column_values_to_match_regex", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_values_to_match_regex.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_values_to_match_regex.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_match_regex", + "macro_sql": "{% test expect_column_values_to_match_regex(model, column_name,\n regex,\n row_condition=None,\n is_raw=False,\n flags=\"\"\n ) %}\n\n{% set expression %}\n{{ dbt_expectations.regexp_instr(column_name, regex, is_raw=is_raw, flags=flags) }} > 0\n{% endset %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.regexp_instr", + "macro.dbt_expectations.expression_is_true" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7192714, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_match_regex_list": { + "name": "test_expect_column_values_to_match_regex_list", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_values_to_match_regex_list.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_values_to_match_regex_list.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_match_regex_list", + "macro_sql": "{% test expect_column_values_to_match_regex_list(model, column_name,\n regex_list,\n match_on=\"any\",\n row_condition=None,\n is_raw=False,\n flags=\"\"\n ) %}\n\n{% set expression %}\n {% for regex in regex_list %}\n {{ dbt_expectations.regexp_instr(column_name, regex, is_raw=is_raw, flags=flags) }} > 0\n {%- if not loop.last %}\n {{ \" and \" if match_on == \"all\" else \" or \"}}\n {% endif -%}\n {% endfor %}\n{% endset %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.regexp_instr", + "macro.dbt_expectations.expression_is_true" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7207668, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_not_match_like_pattern_list": { + "name": "test_expect_column_values_to_not_match_like_pattern_list", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_values_to_not_match_like_pattern_list.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_values_to_not_match_like_pattern_list.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_not_match_like_pattern_list", + "macro_sql": "{% test expect_column_values_to_not_match_like_pattern_list(model, column_name,\n like_pattern_list,\n match_on=\"any\",\n row_condition=None\n ) %}\n\n{% set expression %}\n {% for like_pattern in like_pattern_list %}\n {{ dbt_expectations._get_like_pattern_expression(column_name, like_pattern, positive=False) }}\n {%- if not loop.last %}\n {{ \" and \" if match_on == \"all\" else \" or \"}}\n {% endif -%}\n {% endfor %}\n{% endset %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations._get_like_pattern_expression", + "macro.dbt_expectations.expression_is_true" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7220798, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_value_lengths_to_equal": { + "name": "test_expect_column_value_lengths_to_equal", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/string_matching/expect_column_value_lengths_to_equal.sql", + "original_file_path": "macros/schema_tests/string_matching/expect_column_value_lengths_to_equal.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_value_lengths_to_equal", + "macro_sql": "{% test expect_column_value_lengths_to_equal(model, column_name,\n value,\n row_condition=None\n ) %}\n\n{% set expression = dbt.length(column_name) ~ \" = \" ~ value %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt.length", + "macro.dbt_expectations.expression_is_true" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7227771, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_not_be_null": { + "name": "test_expect_column_values_to_not_be_null", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_not_be_null.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_not_be_null.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_not_be_null", + "macro_sql": "{% test expect_column_values_to_not_be_null(model, column_name, row_condition=None) %}\n\n{% set expression = column_name ~ \" is not null\" %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7233398, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_unique": { + "name": "test_expect_column_values_to_be_unique", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_unique.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_unique.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_unique", + "macro_sql": "{% test expect_column_values_to_be_unique(model, column_name, row_condition=None) %}\n{{ dbt_expectations.test_expect_compound_columns_to_be_unique(model, [column_name], row_condition=row_condition) }}\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.test_expect_compound_columns_to_be_unique" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7236896, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_null": { + "name": "test_expect_column_values_to_be_null", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_null.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_null.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_null", + "macro_sql": "{% test expect_column_values_to_be_null(model, column_name, row_condition=None) %}\n\n{% set expression = column_name ~ \" is null\" %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7243192, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_in_set": { + "name": "test_expect_column_values_to_be_in_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_in_set.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_in_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_in_set", + "macro_sql": "{% test expect_column_values_to_be_in_set(model, column_name,\n value_set,\n quote_values=True,\n row_condition=None\n ) %}\n\nwith all_values as (\n\n select\n {{ column_name }} as value_field\n\n from {{ model }}\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n\n),\nset_values as (\n\n {% for value in value_set -%}\n select\n {% if quote_values -%}\n cast('{{ value }}' as {{ dbt.type_string() }})\n {%- else -%}\n {{ value }}\n {%- endif %} as value_field\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n),\nvalidation_errors as (\n -- values from the model that are not in the set\n select\n v.value_field\n from\n all_values v\n left join\n set_values s on v.value_field = s.value_field\n where\n s.value_field is null\n\n)\n\nselect *\nfrom validation_errors\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt.type_string"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7256062, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_increasing": { + "name": "test_expect_column_values_to_be_increasing", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_increasing.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_increasing.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_increasing", + "macro_sql": "{% test expect_column_values_to_be_increasing(model, column_name,\n sort_column=None,\n strictly=True,\n row_condition=None,\n group_by=None) %}\n\n{%- set sort_column = column_name if not sort_column else sort_column -%}\n{%- set operator = \">\" if strictly else \">=\" -%}\nwith all_values as (\n\n select\n {{ sort_column }} as sort_column,\n {%- if group_by -%}\n {{ group_by | join(\", \") }},\n {%- endif %}\n {{ column_name }} as value_field\n from {{ model }}\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n\n),\nadd_lag_values as (\n\n select\n sort_column,\n {%- if group_by -%}\n {{ group_by | join(\", \") }},\n {%- endif %}\n value_field,\n lag(value_field) over\n {%- if not group_by -%}\n (order by sort_column)\n {%- else -%}\n (partition by {{ group_by | join(\", \") }} order by sort_column)\n {%- endif %} as value_field_lag\n from\n all_values\n\n),\nvalidation_errors as (\n select\n *\n from\n add_lag_values\n where\n value_field_lag is not null\n and\n not (value_field {{ operator }} value_field_lag)\n\n)\nselect *\nfrom validation_errors\n{% endtest %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7275124, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_of_type": { + "name": "test_expect_column_values_to_be_of_type", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_of_type.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_of_type.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_of_type", + "macro_sql": "{%- test expect_column_values_to_be_of_type(model, column_name, column_type) -%}\n{{ dbt_expectations.test_expect_column_values_to_be_in_type_list(model, column_name, [column_type]) }}\n{%- endtest -%}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.test_expect_column_values_to_be_in_type_list" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.727856, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_have_consistent_casing": { + "name": "test_expect_column_values_to_have_consistent_casing", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_have_consistent_casing.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_have_consistent_casing.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_have_consistent_casing", + "macro_sql": "{% test expect_column_values_to_have_consistent_casing(model, column_name, display_inconsistent_columns=False) %}\n\nwith test_data as (\n\n select\n distinct {{ column_name }} as distinct_values\n from\n {{ model }}\n\n ),\n {% if display_inconsistent_columns %}\n validation_errors as (\n\n select\n lower(distinct_values) as inconsistent_columns,\n count(distinct_values) as set_count_case_insensitive\n from\n test_data\n group by 1\n having\n count(distinct_values) > 1\n\n )\n select * from validation_errors\n {% else %}\n validation_errors as (\n\n select\n count(1) as set_count,\n count(distinct lower(distinct_values)) as set_count_case_insensitive\n from\n test_data\n\n )\n select *\n from\n validation_errors\n where\n set_count != set_count_case_insensitive\n {% endif %}\n {%- endtest -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7283857, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_in_type_list": { + "name": "test_expect_column_values_to_be_in_type_list", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_in_type_list.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_in_type_list.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_in_type_list", + "macro_sql": "{%- test expect_column_values_to_be_in_type_list(model, column_name, column_type_list) -%}\n{%- if execute -%}\n\n {%- set column_name = column_name | upper -%}\n {%- set columns_in_relation = adapter.get_columns_in_relation(model) -%}\n {%- set column_type_list = column_type_list| map(\"upper\") | list -%}\n with relation_columns as (\n\n {% for column in columns_in_relation %}\n select\n cast('{{ escape_single_quotes(column.name | upper) }}' as {{ dbt.type_string() }}) as relation_column,\n cast('{{ column.dtype | upper }}' as {{ dbt.type_string() }}) as relation_column_type\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n ),\n test_data as (\n\n select\n *\n from\n relation_columns\n where\n relation_column = '{{ column_name }}'\n and\n relation_column_type not in ('{{ column_type_list | join(\"', '\") }}')\n\n )\n select *\n from test_data\n\n{%- endif -%}\n{%- endtest -%}", + "depends_on": { + "macros": ["macro.dbt.escape_single_quotes", "macro.dbt.type_string"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7298653, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_between": { + "name": "test_expect_column_values_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_between.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_between", + "macro_sql": "{% test expect_column_values_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n row_condition=None,\n strictly=False\n ) %}\n\n{% set expression %}\n{{ column_name }}\n{% endset %}\n\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=None,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7307932, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_decreasing": { + "name": "test_expect_column_values_to_be_decreasing", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_decreasing.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_be_decreasing.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_decreasing", + "macro_sql": "{% test expect_column_values_to_be_decreasing(model, column_name,\n sort_column=None,\n strictly=True,\n row_condition=None,\n group_by=None) %}\n\n{%- set sort_column = column_name if not sort_column else sort_column -%}\n{%- set operator = \"<\" if strictly else \"<=\" %}\nwith all_values as (\n\n select\n {{ sort_column }} as sort_column,\n {%- if group_by -%}\n {{ group_by | join(\", \") }},\n {%- endif %}\n {{ column_name }} as value_field\n from {{ model }}\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n\n),\nadd_lag_values as (\n\n select\n sort_column,\n value_field,\n lag(value_field) over\n {%- if not group_by -%}\n (order by sort_column)\n {%- else -%}\n (partition by {{ group_by | join(\", \") }} order by sort_column)\n {%- endif %} as value_field_lag\n from\n all_values\n\n),\nvalidation_errors as (\n\n select\n *\n from\n add_lag_values\n where\n value_field_lag is not null\n and\n not (value_field {{ operator }} value_field_lag)\n\n)\nselect *\nfrom validation_errors\n{% endtest %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.732285, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_not_be_in_set": { + "name": "test_expect_column_values_to_not_be_in_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/column_values_basic/expect_column_values_to_not_be_in_set.sql", + "original_file_path": "macros/schema_tests/column_values_basic/expect_column_values_to_not_be_in_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_not_be_in_set", + "macro_sql": "{% test expect_column_values_to_not_be_in_set(model, column_name,\n value_set,\n quote_values=True,\n row_condition=None\n ) %}\n\nwith all_values as (\n\n select\n {{ column_name }} as value_field\n\n from {{ model }}\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n\n),\nset_values as (\n\n {% for value in value_set -%}\n select\n {% if quote_values -%}\n cast('{{ value }}' as {{ dbt.type_string() }})\n {%- else -%}\n {{ value }}\n {%- endif %} as value_field\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n),\nvalidation_errors as (\n -- values from the model that match the set\n select\n v.value_field\n from\n all_values v\n join\n set_values s on v.value_field = s.value_field\n\n)\n\nselect *\nfrom validation_errors\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt.type_string"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7334695, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_select_column_values_to_be_unique_within_record": { + "name": "test_expect_select_column_values_to_be_unique_within_record", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql", + "original_file_path": "macros/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql", + "unique_id": "macro.dbt_expectations.test_expect_select_column_values_to_be_unique_within_record", + "macro_sql": "{% test expect_select_column_values_to_be_unique_within_record(model,\n column_list,\n quote_columns=False,\n ignore_row_if=\"all_values_are_missing\",\n row_condition=None\n ) -%}\n {{ adapter.dispatch('test_expect_select_column_values_to_be_unique_within_record', 'dbt_expectations') (model, column_list, quote_columns, ignore_row_if, row_condition) }}\n{%- endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_select_column_values_to_be_unique_within_record" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7351718, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_select_column_values_to_be_unique_within_record": { + "name": "default__test_expect_select_column_values_to_be_unique_within_record", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql", + "original_file_path": "macros/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_select_column_values_to_be_unique_within_record", + "macro_sql": "{% macro default__test_expect_select_column_values_to_be_unique_within_record(model,\n column_list,\n quote_columns,\n ignore_row_if,\n row_condition\n ) %}\n\n{% if not quote_columns %}\n {%- set columns=column_list %}\n{% elif quote_columns %}\n {%- set columns=[] %}\n {% for column in column_list -%}\n {% set columns = columns.append( adapter.quote(column) ) %}\n {%- endfor %}\n{% else %}\n {{ exceptions.raise_compiler_error(\n \"`quote_columns` argument for unique_combination_of_columns test must be one of [True, False] Got: '\" ~ quote_columns ~\"'.'\"\n ) }}\n{% endif %}\n\n{%- set row_condition_ext -%}\n\n {%- if row_condition %}\n {{ row_condition }} and\n {% endif -%}\n\n {{ dbt_expectations.ignore_row_if_expression(ignore_row_if, columns) }}\n\n{%- endset -%}\n\nwith column_values as (\n\n select\n row_number() over(order by 1) as row_index,\n {% for column in columns -%}\n {{ column }}{% if not loop.last %},{% endif %}\n {%- endfor %}\n from {{ model }}\n where\n 1=1\n {%- if row_condition_ext %}\n and {{ row_condition_ext }}\n {% endif %}\n\n),\nunpivot_columns as (\n\n {% for column in columns %}\n select row_index, '{{ column }}' as column_name, {{ column }} as column_value from column_values\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n),\nvalidation_errors as (\n\n select\n row_index,\n count(distinct column_value) as column_values\n from unpivot_columns\n group by 1\n having count(distinct column_value) < {{ columns | length }}\n\n)\nselect * from validation_errors\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.ignore_row_if_expression"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7369163, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_pair_values_A_to_be_greater_than_B": { + "name": "test_expect_column_pair_values_A_to_be_greater_than_B", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/multi-column/expect_column_pair_values_A_to_be_greater_than_B.sql", + "original_file_path": "macros/schema_tests/multi-column/expect_column_pair_values_A_to_be_greater_than_B.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_pair_values_A_to_be_greater_than_B", + "macro_sql": "{% test expect_column_pair_values_A_to_be_greater_than_B(model,\n column_A,\n column_B,\n or_equal=False,\n row_condition=None\n ) %}\n\n{% set operator = \">=\" if or_equal else \">\" %}\n{% set expression = column_A ~ \" \" ~ operator ~ \" \" ~ column_B %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7378955, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_compound_columns_to_be_unique": { + "name": "test_expect_compound_columns_to_be_unique", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/multi-column/expect_compound_columns_to_be_unique.sql", + "original_file_path": "macros/schema_tests/multi-column/expect_compound_columns_to_be_unique.sql", + "unique_id": "macro.dbt_expectations.test_expect_compound_columns_to_be_unique", + "macro_sql": "{% test expect_compound_columns_to_be_unique(model,\n column_list,\n quote_columns=False,\n ignore_row_if=\"all_values_are_missing\",\n row_condition=None\n ) %}\n{% if not column_list %}\n {{ exceptions.raise_compiler_error(\n \"`column_list` must be specified as a list of columns. Got: '\" ~ column_list ~\"'.'\"\n ) }}\n{% endif %}\n\n{% if not quote_columns %}\n {%- set columns=column_list %}\n{% elif quote_columns %}\n {%- set columns=[] %}\n {% for column in column_list -%}\n {% set columns = columns.append( adapter.quote(column) ) %}\n {%- endfor %}\n{% else %}\n {{ exceptions.raise_compiler_error(\n \"`quote_columns` argument for expect_compound_columns_to_be_unique test must be one of [True, False] Got: '\" ~ quote_columns ~\"'.'\"\n ) }}\n{% endif %}\n\n{%- set row_condition_ext -%}\n\n {%- if row_condition %}\n {{ row_condition }} and\n {% endif -%}\n\n {{ dbt_expectations.ignore_row_if_expression(ignore_row_if, columns) }}\n\n{%- endset -%}\n\nwith validation_errors as (\n\n select\n {% for column in columns -%}\n {{ column }},\n {%- endfor %}\n count(*) as {{adapter.quote(\"n_records\")}}\n from {{ model }}\n where\n 1=1\n {%- if row_condition_ext %}\n and {{ row_condition_ext }}\n {% endif %}\n group by\n {% for column in columns -%}\n {{ column }}{% if not loop.last %},{% endif %}\n {%- endfor %}\n having count(*) > 1\n\n)\nselect * from validation_errors\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.ignore_row_if_expression"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7406085, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_multicolumn_sum_to_equal": { + "name": "test_expect_multicolumn_sum_to_equal", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/multi-column/expect_multicolumn_sum_to_equal.sql", + "original_file_path": "macros/schema_tests/multi-column/expect_multicolumn_sum_to_equal.sql", + "unique_id": "macro.dbt_expectations.test_expect_multicolumn_sum_to_equal", + "macro_sql": "{% test expect_multicolumn_sum_to_equal(model,\n column_list,\n sum_total,\n group_by=None,\n row_condition=None\n ) %}\n\n{% set expression %}\n{% for column in column_list %}\nsum({{ column }}){% if not loop.last %} + {% endif %}\n{% endfor %} = {{ sum_total }}\n{% endset %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=group_by,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7417772, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_pair_values_to_be_equal": { + "name": "test_expect_column_pair_values_to_be_equal", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/multi-column/expect_column_pair_values_to_be_equal.sql", + "original_file_path": "macros/schema_tests/multi-column/expect_column_pair_values_to_be_equal.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_pair_values_to_be_equal", + "macro_sql": "{% test expect_column_pair_values_to_be_equal(model,\n column_A,\n column_B,\n row_condition=None\n ) %}\n\n{% set operator = \"=\" %}\n{% set expression = column_A ~ \" \" ~ operator ~ \" \" ~ column_B %}\n\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7425523, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_pair_values_to_be_in_set": { + "name": "test_expect_column_pair_values_to_be_in_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/multi-column/expect_column_pair_values_to_be_in_set.sql", + "original_file_path": "macros/schema_tests/multi-column/expect_column_pair_values_to_be_in_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_pair_values_to_be_in_set", + "macro_sql": "{% test expect_column_pair_values_to_be_in_set(model,\n column_A,\n column_B,\n value_pairs_set,\n row_condition=None\n ) %}\n\n{% set expression %}\n{% for pair in value_pairs_set %}\n{%- if (pair | length) == 2 %}\n({{ column_A }} = {{ pair[0] }} and {{ column_B }} = {{ pair[1] }}){% if not loop.last %} or {% endif %}\n{% else %}\n{{ exceptions.raise_compiler_error(\n \"`value_pairs_set` argument for expect_column_pair_values_to_be_in_set test cannot have more than 2 item per element.\n Got: '\" ~ pair ~ \"'.'\"\n ) }}\n{% endif %}\n{% endfor %}\n{% endset %}\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=None,\n row_condition=row_condition\n )\n }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.744287, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_within_n_stdevs": { + "name": "test_expect_column_values_to_be_within_n_stdevs", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_stdevs.sql", + "original_file_path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_stdevs.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_within_n_stdevs", + "macro_sql": "{% test expect_column_values_to_be_within_n_stdevs(model,\n column_name,\n group_by=None,\n sigma_threshold=3\n ) -%}\n {{\n adapter.dispatch('test_expect_column_values_to_be_within_n_stdevs', 'dbt_expectations') (\n model, column_name, group_by, sigma_threshold\n )\n }}\n{%- endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_column_values_to_be_within_n_stdevs" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7453265, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_column_values_to_be_within_n_stdevs": { + "name": "default__test_expect_column_values_to_be_within_n_stdevs", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_stdevs.sql", + "original_file_path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_stdevs.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_column_values_to_be_within_n_stdevs", + "macro_sql": "{% macro default__test_expect_column_values_to_be_within_n_stdevs(model,\n column_name,\n group_by,\n sigma_threshold\n ) %}\n\nwith metric_values as (\n\n select\n {{ group_by | join(\",\") ~ \",\" if group_by }}\n sum({{ column_name }}) as {{ column_name }}\n from\n {{ model }}\n {% if group_by -%}\n {{ dbt_expectations.group_by(group_by | length) }}\n {%- endif %}\n\n),\nmetric_values_with_statistics as (\n\n select\n *,\n avg({{ column_name }}) over() as {{ column_name }}_average,\n stddev({{ column_name }}) over() as {{ column_name }}_stddev\n from\n metric_values\n\n),\nmetric_values_z_scores as (\n\n select\n *,\n ({{ column_name }} - {{ column_name }}_average)/\n nullif({{ column_name }}_stddev, 0) as {{ column_name }}_sigma\n from\n metric_values_with_statistics\n\n)\nselect\n *\nfrom\n metric_values_z_scores\nwhere\n abs({{ column_name }}_sigma) > {{ sigma_threshold }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.group_by"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7462013, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_row_values_to_have_data_for_every_n_datepart": { + "name": "test_expect_row_values_to_have_data_for_every_n_datepart", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/distributional/expect_row_values_to_have_data_for_every_n_datepart.sql", + "original_file_path": "macros/schema_tests/distributional/expect_row_values_to_have_data_for_every_n_datepart.sql", + "unique_id": "macro.dbt_expectations.test_expect_row_values_to_have_data_for_every_n_datepart", + "macro_sql": "{%- test expect_row_values_to_have_data_for_every_n_datepart(model,\n date_col,\n date_part=\"day\",\n interval=None,\n row_condition=None,\n exclusion_condition=None,\n test_start_date=None,\n test_end_date=None) -%}\n{% if not execute %}\n {{ return('') }}\n{% endif %}\n\n{% if not test_start_date or not test_end_date %}\n {% set sql %}\n\n select\n min(cast({{ date_col }} as date)) as start_{{ date_part }},\n max(cast({{ date_col }} as date)) as end_{{ date_part }}\n from {{ model }}\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n\n {% endset %}\n\n {%- set dr = run_query(sql) -%}\n\n {%- set db_start_date = dr.columns[0].values()[0] -%}\n {%- set db_end_date = dr.columns[1].values()[0] -%}\n\n {% if db_start_date is not string %}\n {%- set db_start_date = db_start_date.strftime('%Y-%m-%d') -%}\n {%- set db_end_date = db_end_date.strftime('%Y-%m-%d') -%}\n {% endif %}\n\n{% endif %}\n\n{% if not test_start_date %}\n{% set start_date = db_start_date %}\n{% else %}\n{% set start_date = test_start_date %}\n{% endif %}\n\n\n{% if not test_end_date %}\n{% set end_date = db_end_date %}\n{% else %}\n{% set end_date = test_end_date %}\n{% endif %}\n\nwith base_dates as (\n\n {{ dbt_date.get_base_dates(start_date=start_date, end_date=end_date, datepart=date_part) }}\n {% if interval %}\n {#\n Filter the date spine created above down to the interval granularity using a modulo operation.\n The number of date_parts after the start_date divided by the integer interval will produce no remainder for the desired intervals,\n e.g. for 2-day interval from a starting Jan 1, 2020:\n params: start_date = '2020-01-01', date_part = 'day', interval = 2\n date spine created above: [2020-01-01, 2020-01-02, 2020-01-03, 2020-01-04, 2020-01-05, ...]\n The first parameter to the `mod` function would be the number of days between the start_date and the spine date, i.e. [0, 1, 2, 3, 4 ...]\n The second parameter to the `mod` function would be the integer interval, i.e. 2\n This modulo operation produces the following remainders: [0, 1, 0, 1, 0, ...]\n Filtering the spine only where this remainder == 0 will return a spine with every other day as desired, i.e. [2020-01-01, 2020-01-03, 2020-01-05, ...]\n #}\n where mod(\n cast({{ dbt.datediff(\"'\" ~ start_date ~ \"'\", 'date_' ~ date_part, date_part) }} as {{ dbt.type_int() }}),\n cast({{interval}} as {{ dbt.type_int() }})\n ) = 0\n {% endif %}\n\n),\nmodel_data as (\n\n select\n {% if not interval %}\n\n cast({{ dbt.date_trunc(date_part, date_col) }} as {{ dbt_expectations.type_datetime() }}) as date_{{ date_part }},\n\n {% else %}\n {#\n Use a modulo operator to determine the number of intervals that a date_col is away from the interval-date spine\n and subtracts that amount to effectively slice each date_col record into its corresponding spine bucket,\n e.g. given a date_col of with records [2020-01-01, 2020-01-02, 2020-01-03, 2020-01-11, 2020-01-12]\n if we want to slice these dates into their 2-day buckets starting Jan 1, 2020 (start_date = '2020-01-01', date_part='day', interval=2),\n the modulo operation described above will produce these remainders: [0, 1, 0, 0, 1]\n subtracting that number of days from the observations will produce records [2020-01-01, 2020-01-01, 2020-01-03, 2020-01-11, 2020-01-11],\n all of which align with records from the interval-date spine\n #}\n {{ dbt.dateadd(\n date_part,\n \"mod(\n cast(\" ~ dbt.datediff(\"'\" ~ start_date ~ \"'\", date_col, date_part) ~ \" as \" ~ dbt.type_int() ~ \" ),\n cast(\" ~ interval ~ \" as \" ~ dbt.type_int() ~ \" )\n ) * (-1)\",\n \"cast( \" ~ dbt.date_trunc(date_part, date_col) ~ \" as \" ~ dbt_expectations.type_datetime() ~ \")\"\n )}} as date_{{ date_part }},\n\n {% endif %}\n\n count(*) as row_cnt\n from\n {{ model }} f\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n group by\n date_{{date_part}}\n\n),\n\nfinal as (\n\n select\n cast(d.date_{{ date_part }} as {{ dbt_expectations.type_datetime() }}) as date_{{ date_part }},\n case when f.date_{{ date_part }} is null then true else false end as is_missing,\n coalesce(f.row_cnt, 0) as row_cnt\n from\n base_dates d\n left join\n model_data f on cast(d.date_{{ date_part }} as {{ dbt_expectations.type_datetime() }}) = f.date_{{ date_part }}\n)\nselect\n *\nfrom final\nwhere row_cnt = 0\n{% if exclusion_condition %}\n and {{ exclusion_condition }}\n{% endif %}\n{%- endtest -%}", + "depends_on": { + "macros": [ + "macro.dbt.run_query", + "macro.dbt_date.get_base_dates", + "macro.dbt.datediff", + "macro.dbt.type_int", + "macro.dbt.date_trunc", + "macro.dbt_expectations.type_datetime", + "macro.dbt.dateadd" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7521818, + "supported_languages": null + }, + "macro.dbt_expectations._get_metric_expression": { + "name": "_get_metric_expression", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql", + "original_file_path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql", + "unique_id": "macro.dbt_expectations._get_metric_expression", + "macro_sql": "{%- macro _get_metric_expression(metric_column, take_logs) -%}\n\n{%- if take_logs %}\n{%- set expr = \"nullif(\" ~ metric_column ~ \", 0)\" -%}\ncoalesce({{ dbt_expectations.log_natural(expr) }}, 0)\n{%- else -%}\ncoalesce({{ metric_column }}, 0)\n{%- endif %}\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": ["macro.dbt_expectations.log_natural"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.75657, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_values_to_be_within_n_moving_stdevs": { + "name": "test_expect_column_values_to_be_within_n_moving_stdevs", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql", + "original_file_path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_values_to_be_within_n_moving_stdevs", + "macro_sql": "{% test expect_column_values_to_be_within_n_moving_stdevs(model,\n column_name,\n date_column_name,\n group_by=None,\n period='day',\n lookback_periods=1,\n trend_periods=7,\n test_periods=14,\n sigma_threshold=3,\n sigma_threshold_upper=None,\n sigma_threshold_lower=None,\n take_diffs=true,\n take_logs=true\n ) -%}\n {{ adapter.dispatch('test_expect_column_values_to_be_within_n_moving_stdevs', 'dbt_expectations') (model,\n column_name,\n date_column_name,\n group_by,\n period,\n lookback_periods,\n trend_periods,\n test_periods,\n sigma_threshold,\n sigma_threshold_upper,\n sigma_threshold_lower,\n take_diffs,\n take_logs\n ) }}\n{%- endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_column_values_to_be_within_n_moving_stdevs" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7575474, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_column_values_to_be_within_n_moving_stdevs": { + "name": "default__test_expect_column_values_to_be_within_n_moving_stdevs", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql", + "original_file_path": "macros/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_column_values_to_be_within_n_moving_stdevs", + "macro_sql": "{% macro default__test_expect_column_values_to_be_within_n_moving_stdevs(model,\n column_name,\n date_column_name,\n group_by,\n period,\n lookback_periods,\n trend_periods,\n test_periods,\n sigma_threshold,\n sigma_threshold_upper,\n sigma_threshold_lower,\n take_diffs,\n take_logs\n ) %}\n\n{%- set sigma_threshold_upper = sigma_threshold_upper if sigma_threshold_upper else sigma_threshold -%}\n{%- set sigma_threshold_lower = sigma_threshold_lower if sigma_threshold_lower else -1 * sigma_threshold -%}\n{%- set partition_by = \"partition by \" ~ (group_by | join(\",\")) if group_by -%}\n{%- set group_by_length = (group_by | length ) if group_by else 0 -%}\n\nwith metric_values as (\n\n with grouped_metric_values as (\n\n select\n {{ dbt.date_trunc(period, date_column_name) }} as metric_period,\n {{ group_by | join(\",\") ~ \",\" if group_by }}\n sum({{ column_name }}) as agg_metric_value\n from\n {{ model }}\n {{ dbt_expectations.group_by(1 + group_by_length) }}\n\n )\n {%- if take_diffs %}\n , grouped_metric_values_with_priors as (\n\n select\n *,\n lag(agg_metric_value, {{ lookback_periods }}) over(\n {{ partition_by }}\n order by metric_period) as prior_agg_metric_value\n from\n grouped_metric_values d\n\n )\n select\n *,\n {{ dbt_expectations._get_metric_expression(\"agg_metric_value\", take_logs) }}\n -\n {{ dbt_expectations._get_metric_expression(\"prior_agg_metric_value\", take_logs) }}\n as metric_test_value\n from\n grouped_metric_values_with_priors d\n\n {%- else %}\n\n select\n *,\n {{ dbt_expectations._get_metric_expression(\"agg_metric_value\", take_logs) }}\n as metric_test_value\n from\n grouped_metric_values\n\n {%- endif %}\n\n),\nmetric_moving_calcs as (\n\n select\n *,\n avg(metric_test_value)\n over({{ partition_by }}\n order by metric_period rows\n between {{ trend_periods }} preceding and 1 preceding) as metric_test_rolling_average,\n stddev(metric_test_value)\n over({{ partition_by }}\n order by metric_period rows\n between {{ trend_periods }} preceding and 1 preceding) as metric_test_rolling_stddev\n from\n metric_values\n\n),\nmetric_sigma as (\n\n select\n *,\n (metric_test_value - metric_test_rolling_average) as metric_test_delta,\n (metric_test_value - metric_test_rolling_average)/\n nullif(metric_test_rolling_stddev, 0) as metric_test_sigma\n from\n metric_moving_calcs\n\n)\nselect\n *\nfrom\n metric_sigma\nwhere\n\n metric_period >= cast(\n {{ dbt.dateadd(period, -test_periods, dbt.date_trunc(period, dbt_date.now())) }}\n as {{ dbt_expectations.type_timestamp() }})\n and\n metric_period < {{ dbt.date_trunc(period, dbt_date.now()) }}\n and\n\n not (\n metric_test_sigma >= {{ sigma_threshold_lower }} and\n metric_test_sigma <= {{ sigma_threshold_upper }}\n )\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt.date_trunc", + "macro.dbt_expectations.group_by", + "macro.dbt_expectations._get_metric_expression", + "macro.dbt.dateadd", + "macro.dbt_date.now", + "macro.dbt_expectations.type_timestamp" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7599735, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_unique_value_count_to_be_between": { + "name": "test_expect_column_unique_value_count_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_unique_value_count_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_unique_value_count_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_unique_value_count_to_be_between", + "macro_sql": "{% test expect_column_unique_value_count_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) %}\n{% set expression %}\ncount(distinct {{ column_name }})\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.761075, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_quantile_values_to_be_between": { + "name": "test_expect_column_quantile_values_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_quantile_values_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_quantile_values_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_quantile_values_to_be_between", + "macro_sql": "{% test expect_column_quantile_values_to_be_between(model, column_name,\n quantile,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) %}\n\n{% set expression %}\n{{ dbt_expectations.percentile_cont(column_name, quantile) }}\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.percentile_cont", + "macro.dbt_expectations.expression_between" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.762152, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_min_to_be_between": { + "name": "test_expect_column_min_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_min_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_min_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_min_to_be_between", + "macro_sql": "{% test expect_column_min_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) %}\n{% set expression %}\nmin({{ column_name }})\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7631173, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_most_common_value_to_be_in_set": { + "name": "test_expect_column_most_common_value_to_be_in_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_most_common_value_to_be_in_set", + "macro_sql": "{% test expect_column_most_common_value_to_be_in_set(model,\n column_name,\n value_set,\n top_n,\n quote_values=True,\n data_type=\"decimal\",\n row_condition=None\n ) -%}\n\n {{ adapter.dispatch('test_expect_column_most_common_value_to_be_in_set', 'dbt_expectations') (\n model, column_name, value_set, top_n, quote_values, data_type, row_condition\n ) }}\n\n{%- endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_column_most_common_value_to_be_in_set" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7649598, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_column_most_common_value_to_be_in_set": { + "name": "default__test_expect_column_most_common_value_to_be_in_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_column_most_common_value_to_be_in_set", + "macro_sql": "{% macro default__test_expect_column_most_common_value_to_be_in_set(model,\n column_name,\n value_set,\n top_n,\n quote_values,\n data_type,\n row_condition\n ) %}\n\nwith value_counts as (\n\n select\n {% if quote_values -%}\n {{ column_name }}\n {%- else -%}\n cast({{ column_name }} as {{ data_type }})\n {%- endif %} as value_field,\n count(*) as value_count\n\n from {{ model }}\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n\n group by {% if quote_values -%}\n {{ column_name }}\n {%- else -%}\n cast({{ column_name }} as {{ data_type }})\n {%- endif %}\n\n),\nvalue_counts_ranked as (\n\n select\n *,\n row_number() over(order by value_count desc) as value_count_rank\n from\n value_counts\n\n),\nvalue_count_top_n as (\n\n select\n value_field\n from\n value_counts_ranked\n where\n value_count_rank = {{ top_n }}\n\n),\nset_values as (\n\n {% for value in value_set -%}\n select\n {% if quote_values -%}\n '{{ value }}'\n {%- else -%}\n cast({{ value }} as {{ data_type }})\n {%- endif %} as value_field\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n\n),\nunique_set_values as (\n\n select distinct value_field\n from\n set_values\n\n),\nvalidation_errors as (\n -- values from the model that are not in the set\n select\n value_field\n from\n value_count_top_n\n where\n value_field not in (select value_field from unique_set_values)\n\n)\n\nselect *\nfrom validation_errors\n\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7662773, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_distinct_values_to_equal_set": { + "name": "test_expect_column_distinct_values_to_equal_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_distinct_values_to_equal_set.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_distinct_values_to_equal_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_distinct_values_to_equal_set", + "macro_sql": "{% test expect_column_distinct_values_to_equal_set(model, column_name,\n value_set,\n quote_values=True,\n row_condition=None\n ) %}\n\nwith all_values as (\n\n select distinct\n {{ column_name }} as column_value\n\n from {{ model }}\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n\n),\nset_values as (\n\n {% for value in value_set -%}\n select\n {% if quote_values -%}\n '{{ value }}'\n {%- else -%}\n {{ value }}\n {%- endif %} as value_field\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n\n),\nunique_set_values as (\n\n select distinct value_field\n from\n set_values\n\n),\nvalidation_errors as (\n\n select\n *\n from\n all_values v\n full outer join\n unique_set_values s on v.column_value = s.value_field\n where\n v.column_value is null or\n s.value_field is null\n\n)\n\nselect *\nfrom validation_errors\n\n{% endtest %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.767491, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_distinct_count_to_equal_other_table": { + "name": "test_expect_column_distinct_count_to_equal_other_table", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_equal_other_table.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_equal_other_table.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_distinct_count_to_equal_other_table", + "macro_sql": "{% test expect_column_distinct_count_to_equal_other_table(model,\n compare_model,\n column_name,\n compare_column_name,\n row_condition=None,\n compare_row_condition=None\n ) %}\n{%- set expression -%}\ncount(distinct {{ column_name }})\n{%- endset -%}\n{%- set compare_expression -%}\n{%- if compare_column_name -%}\ncount(distinct {{ compare_column_name }})\n{%- else -%}\n{{ expression }}\n{%- endif -%}\n{%- endset -%}\n{{ dbt_expectations.test_equal_expression(\n model,\n expression=expression,\n compare_model=compare_model,\n compare_expression=compare_expression,\n row_condition=row_condition,\n compare_row_condition=compare_row_condition\n) }}\n{%- endtest -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.test_equal_expression"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7685921, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_distinct_count_to_equal": { + "name": "test_expect_column_distinct_count_to_equal", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_equal.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_equal.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_distinct_count_to_equal", + "macro_sql": "{% test expect_column_distinct_count_to_equal(model,\n column_name,\n value,\n group_by=None,\n row_condition=None\n ) %}\n{% set expression %}\ncount(distinct {{ column_name }}) = {{ value }}\n{% endset %}\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=group_by,\n row_condition=row_condition)\n }}\n{%- endtest -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7693746, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_stdev_to_be_between": { + "name": "test_expect_column_stdev_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_stdev_to_be_between", + "macro_sql": "{% test expect_column_stdev_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) -%}\n {{ adapter.dispatch('test_expect_column_stdev_to_be_between', 'dbt_expectations') (\n model, column_name,\n min_value,\n max_value,\n group_by,\n row_condition,\n strictly\n ) }}\n{%- endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_column_stdev_to_be_between" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7708547, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_column_stdev_to_be_between": { + "name": "default__test_expect_column_stdev_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_column_stdev_to_be_between", + "macro_sql": "{% macro default__test_expect_column_stdev_to_be_between(\n model, column_name,\n min_value,\n max_value,\n group_by,\n row_condition,\n strictly\n ) %}\n\n{% set expression %}\nstddev({{ column_name }})\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.771477, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_distinct_values_to_be_in_set": { + "name": "test_expect_column_distinct_values_to_be_in_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_distinct_values_to_be_in_set.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_distinct_values_to_be_in_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_distinct_values_to_be_in_set", + "macro_sql": "{% test expect_column_distinct_values_to_be_in_set(model,\n column_name,\n value_set,\n quote_values=True,\n row_condition=None\n ) %}\n\nwith all_values as (\n\n select distinct\n {{ column_name }} as value_field\n\n from {{ model }}\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n\n),\nset_values as (\n\n {% for value in value_set -%}\n select\n {% if quote_values -%}\n '{{ value }}'\n {%- else -%}\n {{ value }}\n {%- endif %} as value_field\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n\n),\nunique_set_values as (\n\n select distinct value_field\n from\n set_values\n\n),\nvalidation_errors as (\n -- values from the model that are not in the set\n select\n v.value_field\n from\n all_values v\n left join\n unique_set_values s on v.value_field = s.value_field\n where\n s.value_field is null\n\n)\n\nselect *\nfrom validation_errors\n\n{% endtest %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.772755, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_proportion_of_unique_values_to_be_between": { + "name": "test_expect_column_proportion_of_unique_values_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_proportion_of_unique_values_to_be_between", + "macro_sql": "{% test expect_column_proportion_of_unique_values_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) %}\n{% set expression %}\ncount(distinct {{ column_name }})*1.0/count({{ column_name }})\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7737887, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_distinct_values_to_contain_set": { + "name": "test_expect_column_distinct_values_to_contain_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_distinct_values_to_contain_set.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_distinct_values_to_contain_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_distinct_values_to_contain_set", + "macro_sql": "{% test expect_column_distinct_values_to_contain_set(model, column_name,\n value_set,\n quote_values=True,\n row_condition=None\n ) %}\n\nwith all_values as (\n\n select distinct\n {{ column_name }} as value_field\n\n from {{ model }}\n {% if row_condition %}\n where {{ row_condition }}\n {% endif %}\n\n),\nset_values as (\n\n {% for value in value_set -%}\n select\n {% if quote_values -%}\n '{{ value }}'\n {%- else -%}\n {{ value }}\n {%- endif %} as value_field\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n\n),\nunique_set_values as (\n\n select distinct value_field\n from\n set_values\n\n),\nvalidation_errors as (\n -- values in set that are not in the list of values from the model\n select\n s.value_field\n from\n unique_set_values s\n left join\n all_values v on s.value_field = v.value_field\n where\n v.value_field is null\n\n)\n\nselect *\nfrom validation_errors\n\n{% endtest %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7749662, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_max_to_be_between": { + "name": "test_expect_column_max_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_max_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_max_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_max_to_be_between", + "macro_sql": "{% test expect_column_max_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) %}\n{% set expression %}\nmax({{ column_name }})\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7758853, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_sum_to_be_between": { + "name": "test_expect_column_sum_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_sum_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_sum_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_sum_to_be_between", + "macro_sql": "{% test expect_column_sum_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) %}\n{% set expression %}\nsum({{ column_name }})\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7771971, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_mean_to_be_between": { + "name": "test_expect_column_mean_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_mean_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_mean_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_mean_to_be_between", + "macro_sql": "{% test expect_column_mean_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) %}\n{% set expression %}\navg({{ column_name }})\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n{% endtest %}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7782505, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_distinct_count_to_be_less_than": { + "name": "test_expect_column_distinct_count_to_be_less_than", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_be_less_than.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_be_less_than.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_distinct_count_to_be_less_than", + "macro_sql": "{% test expect_column_distinct_count_to_be_less_than(model,\n column_name,\n value,\n group_by=None,\n row_condition=None\n ) %}\n{% set expression %}\ncount(distinct {{ column_name }}) < {{ value }}\n{% endset %}\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=group_by,\n row_condition=row_condition)\n }}\n{%- endtest -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.778987, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_median_to_be_between": { + "name": "test_expect_column_median_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_median_to_be_between.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_median_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_median_to_be_between", + "macro_sql": "{% test expect_column_median_to_be_between(model, column_name,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) %}\n\n{% set expression %}\n{{ dbt_expectations.median(column_name) }}\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.median", + "macro.dbt_expectations.expression_between" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7800586, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_distinct_count_to_be_greater_than": { + "name": "test_expect_column_distinct_count_to_be_greater_than", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_be_greater_than.sql", + "original_file_path": "macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_be_greater_than.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_distinct_count_to_be_greater_than", + "macro_sql": "{% test expect_column_distinct_count_to_be_greater_than(model,\n column_name,\n value,\n group_by=None,\n row_condition=None\n ) %}\n{% set expression %}\ncount(distinct {{ column_name }}) > {{ value }}\n{% endset %}\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=group_by,\n row_condition=row_condition)\n }}\n{%- endtest -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.780799, + "supported_languages": null + }, + "macro.dbt_expectations._list_intersect": { + "name": "_list_intersect", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/_list_intersect.sql", + "original_file_path": "macros/schema_tests/table_shape/_list_intersect.sql", + "unique_id": "macro.dbt_expectations._list_intersect", + "macro_sql": "{%- macro _list_intersect(list1, list2) -%}\n{%- set matching_items = [] -%}\n{%- for itm in list1 -%}\n {%- if itm in list2 -%}\n {%- do matching_items.append(itm) -%}\n {%- endif -%}\n{%- endfor -%}\n{%- do return(matching_items) -%}\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7814271, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_row_count_to_be_between": { + "name": "test_expect_table_row_count_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_row_count_to_be_between.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_row_count_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_row_count_to_be_between", + "macro_sql": "{%- test expect_table_row_count_to_be_between(model,\n min_value=None,\n max_value=None,\n group_by=None,\n row_condition=None,\n strictly=False\n ) -%}\n {{ adapter.dispatch('test_expect_table_row_count_to_be_between',\n 'dbt_expectations') (model,\n min_value,\n max_value,\n group_by,\n row_condition,\n strictly\n ) }}\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_table_row_count_to_be_between" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.782434, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_table_row_count_to_be_between": { + "name": "default__test_expect_table_row_count_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_row_count_to_be_between.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_row_count_to_be_between.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_table_row_count_to_be_between", + "macro_sql": "\n\n{%- macro default__test_expect_table_row_count_to_be_between(model,\n min_value,\n max_value,\n group_by,\n row_condition,\n strictly\n ) -%}\n{% set expression %}\ncount(*)\n{% endset %}\n{{ dbt_expectations.expression_between(model,\n expression=expression,\n min_value=min_value,\n max_value=max_value,\n group_by_columns=group_by,\n row_condition=row_condition,\n strictly=strictly\n ) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_between"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7829704, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_grouped_row_values_to_have_recent_data": { + "name": "test_expect_grouped_row_values_to_have_recent_data", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql", + "unique_id": "macro.dbt_expectations.test_expect_grouped_row_values_to_have_recent_data", + "macro_sql": "{% test expect_grouped_row_values_to_have_recent_data(model,\n group_by,\n timestamp_column,\n datepart,\n interval,\n row_condition=None) %}\n\n {{ adapter.dispatch('test_expect_grouped_row_values_to_have_recent_data', 'dbt_expectations') (model,\n group_by,\n timestamp_column,\n datepart,\n interval,\n row_condition) }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_grouped_row_values_to_have_recent_data" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7852368, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_grouped_row_values_to_have_recent_data": { + "name": "default__test_expect_grouped_row_values_to_have_recent_data", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_grouped_row_values_to_have_recent_data", + "macro_sql": "{% macro default__test_expect_grouped_row_values_to_have_recent_data(model,\n group_by,\n timestamp_column,\n datepart,\n interval,\n row_condition) %}\nwith latest_grouped_timestamps as (\n\n select\n {{ group_by | join(\",\") ~ \",\" if group_by }}\n max(1) as join_key,\n max(cast({{ timestamp_column }} as {{ dbt_expectations.type_timestamp() }})) as latest_timestamp_column\n from\n {{ model }}\n where\n -- to exclude erroneous future dates\n cast({{ timestamp_column }} as {{ dbt_expectations.type_timestamp() }}) <= {{ dbt_date.now() }}\n {% if row_condition %}\n and {{ row_condition }}\n {% endif %}\n\n {% if group_by -%}\n {{ dbt_expectations.group_by(group_by | length) }}\n {%- endif %}\n),\ntotal_row_counts as (\n\n select\n {{ group_by | join(\",\") ~ \",\" if group_by }}\n max(1) as join_key,\n count(*) as row_count\n from\n latest_grouped_timestamps\n {% if group_by -%}\n {{ dbt_expectations.group_by(group_by | length) }}\n {%- endif %}\n\n\n),\noutdated_grouped_timestamps as (\n\n select *\n from\n latest_grouped_timestamps\n where\n -- are the max timestamps per group older than the specified cutoff?\n latest_timestamp_column <\n cast(\n {{ dbt.dateadd(datepart, interval * -1, dbt_date.now()) }}\n as {{ dbt_expectations.type_timestamp() }}\n )\n\n),\nvalidation_errors as (\n\n select\n r.row_count,\n t.*\n from\n total_row_counts r\n left join\n outdated_grouped_timestamps t\n on\n {% for g in group_by %}\n r.{{ g }} = t.{{ g }} and\n {% endfor %}\n r.join_key = t.join_key\n where\n -- fail if either no rows were returned due to row_condition,\n -- or the recency test returned failed rows\n r.row_count = 0\n or\n t.join_key is not null\n\n)\nselect * from validation_errors\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.type_timestamp", + "macro.dbt_date.now", + "macro.dbt_expectations.group_by", + "macro.dbt.dateadd" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7868521, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_row_count_to_equal_other_table": { + "name": "test_expect_table_row_count_to_equal_other_table", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal_other_table.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal_other_table.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_row_count_to_equal_other_table", + "macro_sql": "{%- test expect_table_row_count_to_equal_other_table(model,\n compare_model,\n group_by=None,\n compare_group_by=None,\n factor=1,\n row_condition=None,\n compare_row_condition=None\n ) -%}\n\n {{ adapter.dispatch('test_expect_table_row_count_to_equal_other_table',\n 'dbt_expectations') (model,\n compare_model,\n group_by,\n compare_group_by,\n factor,\n row_condition,\n compare_row_condition\n ) }}\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_table_row_count_to_equal_other_table" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7880123, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_table_row_count_to_equal_other_table": { + "name": "default__test_expect_table_row_count_to_equal_other_table", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal_other_table.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal_other_table.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_table_row_count_to_equal_other_table", + "macro_sql": "\n\n{%- macro default__test_expect_table_row_count_to_equal_other_table(model,\n compare_model,\n group_by,\n compare_group_by,\n factor,\n row_condition,\n compare_row_condition\n ) -%}\n{{ dbt_expectations.test_equal_expression(model, \"count(*)\",\n compare_model=compare_model,\n compare_expression=\"count(*) * \" + factor|string,\n group_by=group_by,\n compare_group_by=compare_group_by,\n row_condition=row_condition,\n compare_row_condition=compare_row_condition\n) }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.test_equal_expression"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7885807, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_row_count_to_equal": { + "name": "test_expect_table_row_count_to_equal", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_row_count_to_equal", + "macro_sql": "{%- test expect_table_row_count_to_equal(model,\n value,\n group_by=None,\n row_condition=None\n ) -%}\n {{ adapter.dispatch('test_expect_table_row_count_to_equal',\n 'dbt_expectations') (model,\n value,\n group_by,\n row_condition\n ) }}\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_table_row_count_to_equal" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7893302, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_table_row_count_to_equal": { + "name": "default__test_expect_table_row_count_to_equal", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_table_row_count_to_equal", + "macro_sql": "\n\n\n\n{%- macro default__test_expect_table_row_count_to_equal(model,\n value,\n group_by,\n row_condition\n ) -%}\n{% set expression %}\ncount(*) = {{ value }}\n{% endset %}\n{{ dbt_expectations.expression_is_true(model,\n expression=expression,\n group_by_columns=group_by,\n row_condition=row_condition)\n }}\n{%- endmacro -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.expression_is_true"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7897563, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_aggregation_to_equal_other_table": { + "name": "test_expect_table_aggregation_to_equal_other_table", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_aggregation_to_equal_other_table.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_aggregation_to_equal_other_table.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_aggregation_to_equal_other_table", + "macro_sql": "{%- test expect_table_aggregation_to_equal_other_table(model,\n expression,\n compare_model,\n compare_expression=None,\n group_by=None,\n compare_group_by=None,\n row_condition=None,\n compare_row_condition=None,\n tolerance=0.0,\n tolerance_percent=None\n ) -%}\n\n\n{{ dbt_expectations.test_equal_expression(\n model,\n expression=expression,\n compare_model=compare_model,\n compare_expression=compare_expression,\n group_by=group_by,\n compare_group_by=compare_group_by,\n row_condition=row_condition,\n compare_row_condition=compare_row_condition,\n tolerance=tolerance,\n tolerance_percent=tolerance_percent\n) }}\n\n{%- endtest -%}", + "depends_on": { + "macros": ["macro.dbt_expectations.test_equal_expression"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7906327, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_columns_to_match_ordered_list": { + "name": "test_expect_table_columns_to_match_ordered_list", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_columns_to_match_ordered_list.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_columns_to_match_ordered_list.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_columns_to_match_ordered_list", + "macro_sql": "{%- test expect_table_columns_to_match_ordered_list(model, column_list, transform=\"upper\") -%}\n{%- if execute -%}\n {%- set column_list = column_list | map(transform) | list -%}\n {%- set relation_column_names = dbt_expectations._get_column_list(model, transform) -%}\n {%- set matching_columns = dbt_expectations._list_intersect(column_list, relation_column_names) -%}\n with relation_columns as (\n\n {% for col_name in relation_column_names %}\n select\n {{ loop.index }} as relation_column_idx,\n cast('{{ col_name }}' as {{ dbt.type_string() }}) as relation_column\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n ),\n input_columns as (\n\n {% for col_name in column_list %}\n select\n {{ loop.index }} as input_column_idx,\n cast('{{ col_name }}' as {{ dbt.type_string() }}) as input_column\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n select *\n from\n relation_columns r\n full outer join\n input_columns i on r.relation_column = i.input_column and r.relation_column_idx = i.input_column_idx\n where\n -- catch any column in input list that is not in the sequence of table columns\n -- or any table column that is not in the input sequence\n r.relation_column is null or\n i.input_column is null\n\n{%- endif -%}\n{%- endtest -%}", + "depends_on": { + "macros": [ + "macro.dbt_expectations._get_column_list", + "macro.dbt_expectations._list_intersect", + "macro.dbt.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7925346, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_columns_to_not_contain_set": { + "name": "test_expect_table_columns_to_not_contain_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_columns_to_not_contain_set.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_columns_to_not_contain_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_columns_to_not_contain_set", + "macro_sql": "{%- test expect_table_columns_to_not_contain_set(model, column_list, transform=\"upper\") -%}\n{%- if execute -%}\n {%- set column_list = column_list | map(transform) | list -%}\n {%- set relation_column_names = dbt_expectations._get_column_list(model, transform) -%}\n {%- set matching_columns = dbt_expectations._list_intersect(column_list, relation_column_names) -%}\n with relation_columns as (\n\n {% for col_name in relation_column_names %}\n select cast('{{ col_name }}' as {{ dbt.type_string() }}) as relation_column\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n ),\n input_columns as (\n\n {% for col_name in column_list %}\n select cast('{{ col_name }}' as {{ dbt.type_string() }}) as input_column\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n -- catch any column in input list that is in the list of table columns\n select *\n from\n input_columns i\n inner join\n relation_columns r on r.relation_column = i.input_column\n\n{%- endif -%}\n{%- endtest -%}", + "depends_on": { + "macros": [ + "macro.dbt_expectations._get_column_list", + "macro.dbt_expectations._list_intersect", + "macro.dbt.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7942815, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_column_count_to_equal_other_table": { + "name": "test_expect_table_column_count_to_equal_other_table", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_column_count_to_equal_other_table.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_column_count_to_equal_other_table.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_column_count_to_equal_other_table", + "macro_sql": "{%- test expect_table_column_count_to_equal_other_table(model, compare_model) -%}\n{%- if execute -%}\n{%- set number_columns = (adapter.get_columns_in_relation(model) | length) -%}\n{%- set compare_number_columns = (adapter.get_columns_in_relation(compare_model) | length) -%}\nwith test_data as (\n\n select\n {{ number_columns }} as number_columns,\n {{ compare_number_columns }} as compare_number_columns\n\n)\nselect *\nfrom test_data\nwhere\n number_columns != compare_number_columns\n{%- endif -%}\n{%- endtest -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7950845, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_column_count_to_equal": { + "name": "test_expect_table_column_count_to_equal", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_column_count_to_equal.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_column_count_to_equal.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_column_count_to_equal", + "macro_sql": "{%- test expect_table_column_count_to_equal(model, value) -%}\n{%- if execute -%}\n{%- set number_actual_columns = (adapter.get_columns_in_relation(model) | length) -%}\nwith test_data as (\n\n select\n {{ number_actual_columns }} as number_actual_columns,\n {{ value }} as value\n\n)\nselect *\nfrom test_data\nwhere\n number_actual_columns != value\n{%- endif -%}\n{%- endtest -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7956245, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_row_values_to_have_recent_data": { + "name": "test_expect_row_values_to_have_recent_data", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_row_values_to_have_recent_data.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_row_values_to_have_recent_data.sql", + "unique_id": "macro.dbt_expectations.test_expect_row_values_to_have_recent_data", + "macro_sql": "{% test expect_row_values_to_have_recent_data(model,\n column_name,\n datepart,\n interval,\n row_condition=None) %}\n\n {{ adapter.dispatch('test_expect_row_values_to_have_recent_data', 'dbt_expectations') (model,\n column_name,\n datepart,\n interval,\n row_condition) }}\n\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_row_values_to_have_recent_data" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7968044, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_row_values_to_have_recent_data": { + "name": "default__test_expect_row_values_to_have_recent_data", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_row_values_to_have_recent_data.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_row_values_to_have_recent_data.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_row_values_to_have_recent_data", + "macro_sql": "{% macro default__test_expect_row_values_to_have_recent_data(model, column_name, datepart, interval, row_condition) %}\n{%- set default_start_date = '1970-01-01' -%}\nwith max_recency as (\n\n select max(cast({{ column_name }} as {{ dbt_expectations.type_timestamp() }})) as max_timestamp\n from\n {{ model }}\n where\n -- to exclude erroneous future dates\n cast({{ column_name }} as {{ dbt_expectations.type_timestamp() }}) <= {{ dbt_date.now() }}\n {% if row_condition %}\n and {{ row_condition }}\n {% endif %}\n)\nselect\n *\nfrom\n max_recency\nwhere\n -- if the row_condition excludes all rows, we need to compare against a default date\n -- to avoid false negatives\n coalesce(max_timestamp, cast('{{ default_start_date }}' as {{ dbt_expectations.type_timestamp() }}))\n <\n cast({{ dbt.dateadd(datepart, interval * -1, dbt_date.now()) }} as {{ dbt_expectations.type_timestamp() }})\n\n{% endmacro %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.type_timestamp", + "macro.dbt_date.now", + "macro.dbt.dateadd" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.7978475, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_row_count_to_equal_other_table_times_factor": { + "name": "test_expect_table_row_count_to_equal_other_table_times_factor", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal_other_table_times_factor.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal_other_table_times_factor.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_row_count_to_equal_other_table_times_factor", + "macro_sql": "{%- test expect_table_row_count_to_equal_other_table_times_factor(model,\n compare_model,\n factor,\n group_by=None,\n compare_group_by=None,\n row_condition=None,\n compare_row_condition=None\n ) -%}\n {{ adapter.dispatch('test_expect_table_row_count_to_equal_other_table_times_factor',\n 'dbt_expectations') (model,\n compare_model,\n factor,\n group_by,\n compare_group_by,\n row_condition,\n compare_row_condition\n ) }}\n{% endtest %}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.default__test_expect_table_row_count_to_equal_other_table_times_factor" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.799045, + "supported_languages": null + }, + "macro.dbt_expectations.default__test_expect_table_row_count_to_equal_other_table_times_factor": { + "name": "default__test_expect_table_row_count_to_equal_other_table_times_factor", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal_other_table_times_factor.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_row_count_to_equal_other_table_times_factor.sql", + "unique_id": "macro.dbt_expectations.default__test_expect_table_row_count_to_equal_other_table_times_factor", + "macro_sql": "\n\n{%- macro default__test_expect_table_row_count_to_equal_other_table_times_factor(model,\n compare_model,\n factor,\n group_by,\n compare_group_by,\n row_condition,\n compare_row_condition\n ) -%}\n\n{{ dbt_expectations.test_expect_table_row_count_to_equal_other_table(model,\n compare_model,\n group_by=group_by,\n compare_group_by=compare_group_by,\n factor=factor,\n row_condition=row_condition,\n compare_row_condition=compare_row_condition\n) }}\n{%- endmacro -%}", + "depends_on": { + "macros": [ + "macro.dbt_expectations.test_expect_table_row_count_to_equal_other_table" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.79953, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_column_count_to_be_between": { + "name": "test_expect_table_column_count_to_be_between", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_column_count_to_be_between.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_column_count_to_be_between.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_column_count_to_be_between", + "macro_sql": "{%- test expect_table_column_count_to_be_between(model,\n min_value=None,\n max_value=None\n ) -%}\n{%- if min_value is none and max_value is none -%}\n{{ exceptions.raise_compiler_error(\n \"You have to provide either a min_value, max_value or both.\"\n) }}\n{%- endif -%}\n{%- if execute -%}\n{%- set number_actual_columns = (adapter.get_columns_in_relation(model) | length) -%}\n\n{%- set expression %}\n( 1=1\n{%- if min_value %} and number_actual_columns >= min_value{% endif %}\n{%- if max_value %} and number_actual_columns <= max_value{% endif %}\n)\n{% endset -%}\n\nwith test_data as (\n\n select\n {{ number_actual_columns }} as number_actual_columns,\n {{ min_value if min_value else 0 }} as min_value,\n {{ max_value if max_value else 0 }} as max_value\n\n)\nselect *\nfrom test_data\nwhere\n not {{ expression }}\n{%- endif -%}\n{%- endtest -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8008666, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_columns_to_contain_set": { + "name": "test_expect_table_columns_to_contain_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_columns_to_contain_set.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_columns_to_contain_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_columns_to_contain_set", + "macro_sql": "{%- test expect_table_columns_to_contain_set(model, column_list, transform=\"upper\") -%}\n{%- if execute -%}\n {%- set column_list = column_list | map(transform) | list -%}\n {%- set relation_column_names = dbt_expectations._get_column_list(model, transform) -%}\n {%- set matching_columns = dbt_expectations._list_intersect(column_list, relation_column_names) -%}\n with relation_columns as (\n\n {% for col_name in relation_column_names %}\n select cast('{{ col_name }}' as {{ dbt.type_string() }}) as relation_column\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n ),\n input_columns as (\n\n {% for col_name in column_list %}\n select cast('{{ col_name }}' as {{ dbt.type_string() }}) as input_column\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n select *\n from\n input_columns i\n left join\n relation_columns r on r.relation_column = i.input_column\n where\n -- catch any column in input list that is not in the list of table columns\n r.relation_column is null\n{%- endif -%}\n{%- endtest -%}", + "depends_on": { + "macros": [ + "macro.dbt_expectations._get_column_list", + "macro.dbt_expectations._list_intersect", + "macro.dbt.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8024457, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_table_columns_to_match_set": { + "name": "test_expect_table_columns_to_match_set", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_table_columns_to_match_set.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_table_columns_to_match_set.sql", + "unique_id": "macro.dbt_expectations.test_expect_table_columns_to_match_set", + "macro_sql": "{%- test expect_table_columns_to_match_set(model, column_list, transform=\"upper\") -%}\n{%- if execute -%}\n {%- set column_list = column_list | map(transform) | list -%}\n {%- set relation_column_names = dbt_expectations._get_column_list(model, transform) -%}\n {%- set matching_columns = dbt_expectations._list_intersect(column_list, relation_column_names) -%}\n with relation_columns as (\n\n {% for col_name in relation_column_names %}\n select cast('{{ col_name }}' as {{ dbt.type_string() }}) as relation_column\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n ),\n input_columns as (\n\n {% for col_name in column_list %}\n select cast('{{ col_name }}' as {{ dbt.type_string() }}) as input_column\n {% if not loop.last %}union all{% endif %}\n {% endfor %}\n )\n select *\n from\n relation_columns r\n full outer join\n input_columns i on r.relation_column = i.input_column\n where\n -- catch any column in input list that is not in the list of table columns\n -- or any table column that is not in the input list\n r.relation_column is null or\n i.input_column is null\n\n{%- endif -%}\n{%- endtest -%}", + "depends_on": { + "macros": [ + "macro.dbt_expectations._get_column_list", + "macro.dbt_expectations._list_intersect", + "macro.dbt.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8041117, + "supported_languages": null + }, + "macro.dbt_expectations._get_column_list": { + "name": "_get_column_list", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/_get_column_list.sql", + "original_file_path": "macros/schema_tests/table_shape/_get_column_list.sql", + "unique_id": "macro.dbt_expectations._get_column_list", + "macro_sql": "{%- macro _get_column_list(model, transform=\"upper\") -%}\n{%- set relation_columns = adapter.get_columns_in_relation(model) -%}\n{%- set relation_column_names = relation_columns | map(attribute=\"name\") | map(transform) | list -%}\n{%- do return(relation_column_names) -%}\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8046744, + "supported_languages": null + }, + "macro.dbt_expectations.test_expect_column_to_exist": { + "name": "test_expect_column_to_exist", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/schema_tests/table_shape/expect_column_to_exist.sql", + "original_file_path": "macros/schema_tests/table_shape/expect_column_to_exist.sql", + "unique_id": "macro.dbt_expectations.test_expect_column_to_exist", + "macro_sql": "{%- test expect_column_to_exist(model, column_name, column_index=None, transform=\"upper\") -%}\n{%- if execute -%}\n\n {%- set column_name = column_name | map(transform) | join -%}\n {%- set relation_column_names = dbt_expectations._get_column_list(model, transform) -%}\n\n {%- set matching_column_index = relation_column_names.index(column_name) if column_name in relation_column_names else -1 %}\n\n {%- if column_index -%}\n\n {%- set column_index_0 = column_index - 1 if column_index > 0 else 0 -%}\n\n {%- set column_index_matches = true if matching_column_index == column_index_0 else false %}\n\n {%- else -%}\n\n {%- set column_index_matches = true -%}\n\n {%- endif %}\n\n with test_data as (\n\n select\n cast('{{ column_name }}' as {{ dbt.type_string() }}) as column_name,\n {{ matching_column_index }} as matching_column_index,\n {{ column_index_matches }} as column_index_matches\n\n )\n select *\n from test_data\n where\n not(matching_column_index >= 0 and column_index_matches)\n\n{%- endif -%}\n{%- endtest -%}", + "depends_on": { + "macros": [ + "macro.dbt_expectations._get_column_list", + "macro.dbt.type_string" + ] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8062599, + "supported_languages": null + }, + "macro.dbt_expectations.rand": { + "name": "rand", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/rand.sql", + "original_file_path": "macros/math/rand.sql", + "unique_id": "macro.dbt_expectations.rand", + "macro_sql": "{% macro rand() -%}\n {{ adapter.dispatch('rand', 'dbt_expectations') () }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.duckdb__rand"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8066964, + "supported_languages": null + }, + "macro.dbt_expectations.default__rand": { + "name": "default__rand", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/rand.sql", + "original_file_path": "macros/math/rand.sql", + "unique_id": "macro.dbt_expectations.default__rand", + "macro_sql": "{% macro default__rand() -%}\n\n rand()\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8068142, + "supported_languages": null + }, + "macro.dbt_expectations.bigquery__rand": { + "name": "bigquery__rand", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/rand.sql", + "original_file_path": "macros/math/rand.sql", + "unique_id": "macro.dbt_expectations.bigquery__rand", + "macro_sql": "{% macro bigquery__rand() -%}\n\n rand()\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8069274, + "supported_languages": null + }, + "macro.dbt_expectations.snowflake__rand": { + "name": "snowflake__rand", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/rand.sql", + "original_file_path": "macros/math/rand.sql", + "unique_id": "macro.dbt_expectations.snowflake__rand", + "macro_sql": "{% macro snowflake__rand(seed) -%}\n\n uniform(0::float, 1::float, random())\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8070545, + "supported_languages": null + }, + "macro.dbt_expectations.postgres__rand": { + "name": "postgres__rand", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/rand.sql", + "original_file_path": "macros/math/rand.sql", + "unique_id": "macro.dbt_expectations.postgres__rand", + "macro_sql": "{% macro postgres__rand() -%}\n\n random()\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8071673, + "supported_languages": null + }, + "macro.dbt_expectations.redshift__rand": { + "name": "redshift__rand", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/rand.sql", + "original_file_path": "macros/math/rand.sql", + "unique_id": "macro.dbt_expectations.redshift__rand", + "macro_sql": "{% macro redshift__rand() -%}\n\n random()\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.807295, + "supported_languages": null + }, + "macro.dbt_expectations.duckdb__rand": { + "name": "duckdb__rand", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/rand.sql", + "original_file_path": "macros/math/rand.sql", + "unique_id": "macro.dbt_expectations.duckdb__rand", + "macro_sql": "{% macro duckdb__rand() -%}\n\n random()\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8074465, + "supported_languages": null + }, + "macro.dbt_expectations.percentile_cont": { + "name": "percentile_cont", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/percentile_cont.sql", + "original_file_path": "macros/math/percentile_cont.sql", + "unique_id": "macro.dbt_expectations.percentile_cont", + "macro_sql": "{% macro percentile_cont(field, quantile, partition=None) %}\n {{ adapter.dispatch('quantile', 'dbt_expectations') (field, quantile, partition) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.default__quantile"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8082602, + "supported_languages": null + }, + "macro.dbt_expectations.default__quantile": { + "name": "default__quantile", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/percentile_cont.sql", + "original_file_path": "macros/math/percentile_cont.sql", + "unique_id": "macro.dbt_expectations.default__quantile", + "macro_sql": "{% macro default__quantile(field, quantile, partition) -%}\n percentile_cont({{ quantile }}) within group (order by {{ field }})\n {%- if partition %}over(partition by {{ partition }}){% endif -%}\n{%- endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8085697, + "supported_languages": null + }, + "macro.dbt_expectations.bigquery__quantile": { + "name": "bigquery__quantile", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/percentile_cont.sql", + "original_file_path": "macros/math/percentile_cont.sql", + "unique_id": "macro.dbt_expectations.bigquery__quantile", + "macro_sql": "{% macro bigquery__quantile(field, quantile, partition) -%}\n percentile_cont({{ field }}, {{ quantile }})\n over({%- if partition %}partition by {{ partition }}{% endif -%})\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8088758, + "supported_languages": null + }, + "macro.dbt_expectations.spark__quantile": { + "name": "spark__quantile", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/percentile_cont.sql", + "original_file_path": "macros/math/percentile_cont.sql", + "unique_id": "macro.dbt_expectations.spark__quantile", + "macro_sql": "{% macro spark__quantile(field, quantile, partition) -%}\n percentile({{ field }}, {{ quantile }})\n over({%- if partition %}partition by {{ partition }}{% endif -%})\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.809218, + "supported_languages": null + }, + "macro.dbt_expectations.median": { + "name": "median", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/median.sql", + "original_file_path": "macros/math/median.sql", + "unique_id": "macro.dbt_expectations.median", + "macro_sql": "{% macro median(field) %}\n{{ dbt_expectations.percentile_cont(field, 0.5) }}\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.percentile_cont"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8095133, + "supported_languages": null + }, + "macro.dbt_expectations.log_natural": { + "name": "log_natural", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/log_natural.sql", + "original_file_path": "macros/math/log_natural.sql", + "unique_id": "macro.dbt_expectations.log_natural", + "macro_sql": "{% macro log_natural(x) -%}\n {{ adapter.dispatch('log_natural', 'dbt_expectations') (x) }}\n{%- endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.default__log_natural"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8099434, + "supported_languages": null + }, + "macro.dbt_expectations.default__log_natural": { + "name": "default__log_natural", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/log_natural.sql", + "original_file_path": "macros/math/log_natural.sql", + "unique_id": "macro.dbt_expectations.default__log_natural", + "macro_sql": "{% macro default__log_natural(x) -%}\n\n ln({{ x }})\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8101017, + "supported_languages": null + }, + "macro.dbt_expectations.bigquery__log_natural": { + "name": "bigquery__log_natural", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/log_natural.sql", + "original_file_path": "macros/math/log_natural.sql", + "unique_id": "macro.dbt_expectations.bigquery__log_natural", + "macro_sql": "{% macro bigquery__log_natural(x) -%}\n\n ln({{ x }})\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8102589, + "supported_languages": null + }, + "macro.dbt_expectations.snowflake__log_natural": { + "name": "snowflake__log_natural", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/log_natural.sql", + "original_file_path": "macros/math/log_natural.sql", + "unique_id": "macro.dbt_expectations.snowflake__log_natural", + "macro_sql": "{% macro snowflake__log_natural(x) -%}\n\n ln({{ x }})\n\n{%- endmacro -%}\n\n", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8104107, + "supported_languages": null + }, + "macro.dbt_expectations.spark__log_natural": { + "name": "spark__log_natural", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/math/log_natural.sql", + "original_file_path": "macros/math/log_natural.sql", + "unique_id": "macro.dbt_expectations.spark__log_natural", + "macro_sql": "{% macro spark__log_natural(x) -%}\n\n ln({{ x }})\n\n{%- endmacro -%}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8105614, + "supported_languages": null + }, + "macro.dbt_expectations.regexp_instr": { + "name": "regexp_instr", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations.regexp_instr", + "macro_sql": "{% macro regexp_instr(source_value, regexp, position=1, occurrence=1, is_raw=False, flags=\"\") %}\n\n {{ adapter.dispatch('regexp_instr', 'dbt_expectations')(\n source_value, regexp, position, occurrence, is_raw, flags\n ) }}\n\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations.duckdb__regexp_instr"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.812384, + "supported_languages": null + }, + "macro.dbt_expectations.default__regexp_instr": { + "name": "default__regexp_instr", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations.default__regexp_instr", + "macro_sql": "{% macro default__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %}\n{# unclear if other databases support raw strings or flags #}\n{% if is_raw or flags %}\n {{ exceptions.warn(\n \"is_raw and flags options are not supported for this adapter \"\n ~ \"and are being ignored.\"\n ) }}\n{% endif %}\nregexp_instr({{ source_value }}, '{{ regexp }}', {{ position }}, {{ occurrence }})\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8129337, + "supported_languages": null + }, + "macro.dbt_expectations.snowflake__regexp_instr": { + "name": "snowflake__regexp_instr", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations.snowflake__regexp_instr", + "macro_sql": "{% macro snowflake__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %}\n{%- set regexp = \"$$\" ~ regexp ~ \"$$\" if is_raw else \"'\" ~ regexp ~ \"'\" -%}\n{% if flags %}{{ dbt_expectations._validate_flags(flags, 'cimes') }}{% endif %}\nregexp_instr({{ source_value }}, {{ regexp }}, {{ position }}, {{ occurrence }}, 0, '{{ flags }}')\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations._validate_flags"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8135989, + "supported_languages": null + }, + "macro.dbt_expectations.bigquery__regexp_instr": { + "name": "bigquery__regexp_instr", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations.bigquery__regexp_instr", + "macro_sql": "{% macro bigquery__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %}\n{% if flags %}\n {{ dbt_expectations._validate_re2_flags(flags) }}\n {# BigQuery prepends \"(?flags)\" to set flags for current group #}\n {%- set regexp = \"(?\" ~ flags ~ \")\" ~ regexp -%}\n{% endif %}\n{%- set regexp = \"r'\" ~ regexp ~ \"'\" if is_raw else \"'\" ~ regexp ~ \"'\" -%}\nregexp_instr({{ source_value }}, {{ regexp }}, {{ position }}, {{ occurrence }})\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations._validate_re2_flags"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8143444, + "supported_languages": null + }, + "macro.dbt_expectations.postgres__regexp_instr": { + "name": "postgres__regexp_instr", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations.postgres__regexp_instr", + "macro_sql": "{% macro postgres__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %}\n{% if flags %}{{ dbt_expectations._validate_flags(flags, 'bcegimnpqstwx') }}{% endif %}\ncoalesce(array_length((select regexp_matches({{ source_value }}, '{{ regexp }}', '{{ flags }}')), 1), 0)\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations._validate_flags"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8147984, + "supported_languages": null + }, + "macro.dbt_expectations.redshift__regexp_instr": { + "name": "redshift__regexp_instr", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations.redshift__regexp_instr", + "macro_sql": "{% macro redshift__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %}\n{% if flags %}{{ dbt_expectations._validate_flags(flags, 'ciep') }}{% endif %}\nregexp_instr({{ source_value }}, '{{ regexp }}', {{ position }}, {{ occurrence }}, 0, '{{ flags }}')\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations._validate_flags"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.815366, + "supported_languages": null + }, + "macro.dbt_expectations.duckdb__regexp_instr": { + "name": "duckdb__regexp_instr", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations.duckdb__regexp_instr", + "macro_sql": "{% macro duckdb__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %}\n{% if flags %}{{ dbt_expectations._validate_flags(flags, 'ciep') }}{% endif %}\nregexp_matches({{ source_value }}, '{{ regexp }}', '{{ flags }}')\n{% endmacro %}", + "depends_on": { + "macros": ["macro.dbt_expectations._validate_flags"] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8158097, + "supported_languages": null + }, + "macro.dbt_expectations.spark__regexp_instr": { + "name": "spark__regexp_instr", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations.spark__regexp_instr", + "macro_sql": "{% macro spark__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %}\n{% if is_raw or flags %}\n {{ exceptions.warn(\n \"is_raw and flags options are not supported for this adapter \"\n ~ \"and are being ignored.\"\n ) }}\n{% endif %}\nlength(regexp_extract({{ source_value }}, '{{ regexp }}', 0))\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8162866, + "supported_languages": null + }, + "macro.dbt_expectations._validate_flags": { + "name": "_validate_flags", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations._validate_flags", + "macro_sql": "{% macro _validate_flags(flags, alphabet) %}\n{% for flag in flags %}\n {% if flag not in alphabet %}\n {# Using raise_compiler_error causes disabled tests with invalid flags to fail compilation #}\n {{ exceptions.warn(\n \"flag \" ~ flag ~ \" not in list of allowed flags for this adapter: \" ~ alphabet | join(\", \")\n ) }}\n {% endif %}\n{% endfor %}\n{% endmacro %}", + "depends_on": { + "macros": [] + }, + "description": "", + "meta": {}, + "docs": { + "show": true, + "node_color": null + }, + "patch_path": null, + "arguments": [], + "created_at": 1711458072.8169308, + "supported_languages": null + }, + "macro.dbt_expectations._validate_re2_flags": { + "name": "_validate_re2_flags", + "resource_type": "macro", + "package_name": "dbt_expectations", + "path": "macros/regex/regexp_instr.sql", + "original_file_path": "macros/regex/regexp_instr.sql", + "unique_id": "macro.dbt_expectations._validate_re2_flags", + "macro_sql": "{% macro _validate_re2_flags(flags) %}\n{# Re2 supports following flags: #}\n{# i : case-insensitive (default fault) #}\n{# m : multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false) #}\n{# s : let . match \\n (default false) #}\n{# U : ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false) #}\n{# Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). #}\n\n{# Regex explanation: do not allow consecutive dashes, accept all re2 flags and clear operator, do not end with a dash #}\n{% set re2_flags_pattern = '^(?!.*--)[-imsU]*(? 1\n\n\n", + "relation_name": null + } + ], + "elapsed_time": 0.30997633934020996, + "args": { + "use_colors_file": true, + "select": [], + "use_colors": true, + "defer": false, + "partial_parse": true, + "profiles_dir": "/home/trymzet/.dbt", + "cache_selected_only": false, + "target": "dev", + "log_file_max_bytes": 10485760, + "favor_state": false, + "log_level": "info", + "log_path": "/home/trymzet/work/dyvenia/nesso-cli/my_nesso_project/logs", + "static_parser": true, + "partial_parse_file_diff": true, + "log_level_file": "debug", + "log_format": "default", + "version_check": true, + "vars": {}, + "enable_legacy_logger": false, + "quiet": false, + "indirect_selection": "eager", + "send_anonymous_usage_stats": true, + "printer_width": 80, + "print": true, + "which": "test", + "write_json": true, + "warn_error_options": { + "include": [], + "exclude": [] + }, + "macro_debugging": false, + "project_dir": "/home/trymzet/work/dyvenia/nesso-cli/my_nesso_project", + "populate_cache": true, + "strict_mode": false, + "invocation_command": "dbt test -t dev", + "show_resource_report": false, + "log_format_file": "debug", + "exclude": [], + "introspect": true + } +} diff --git a/tests/testfile.sqlite b/tests/testfile.sqlite deleted file mode 100644 index c17588a55fc18323df7ae99fd200df6278381f93..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8192 zcmeIuu?c`M5QX6vu~d{ykO@SvvvL3%5eKkPuntO#scpTha^^HTn`Ngfr zxzFA;DrJm1=Oo$C""" + ) + column_mapping = c4c.get_property_to_sap_label_dict( + url="https://example.com/metadata" + ) + assert column_mapping == {"key": "Label"} + + +@mock.patch("viadot.sources.cloud_for_customers.requests.get") +def test_get_response(mocked_requests_get, c4c): + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"d": {"results": [{"key": "value"}]}} + mocked_requests_get.return_value = mock_response + c4c.get_response = mocked_requests_get + + response = c4c.get_response(url="https://example.com/service.svc/Entity") + assert response.ok + assert response.json() == {"d": {"results": [{"key": "value"}]}} diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index a974bd030..ca07cf405 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -2,26 +2,25 @@ from pathlib import Path import pytest - from viadot.config import Config, get_source_config, get_source_credentials + FAKE_SOURCE_CONFIG = {"fake_source": {"credentials": {"api_key": "test"}}} -@pytest.fixture(scope="function") +@pytest.fixture() def TEST_CONFIG_PATH(): """Creates and deletes a test config file for each test. Yields: config_path: The path to the test config file. """ - # Make sure we always create it from scratch. config_path = Path("config.yml") config_path.unlink(missing_ok=True) test_config = {"sources": [FAKE_SOURCE_CONFIG]} - with open(config_path, "w") as f: + with Path(config_path).open("w") as f: json.dump(test_config, f) yield config_path @@ -30,20 +29,19 @@ def TEST_CONFIG_PATH(): config_path.unlink() -@pytest.fixture(scope="function") +@pytest.fixture() def TEST_CONFIG_PATH_JSON(): """Creates and deletes a test config file for each test. Yields: config_path: The path to the test config file. """ - # Make sure we always create it from scratch. config_path = Path("config.json") config_path.unlink(missing_ok=True) test_config = {"sources": [FAKE_SOURCE_CONFIG]} - with open(config_path, "w") as f: + with Path(config_path).open("w") as f: json.dump(test_config, f) yield config_path diff --git a/tests/unit/test_duckdb.py b/tests/unit/test_duckdb.py new file mode 100644 index 000000000..48677faff --- /dev/null +++ b/tests/unit/test_duckdb.py @@ -0,0 +1,127 @@ +from pathlib import Path + +from duckdb import BinderException +import pandas as pd +import pytest +from viadot.sources.duckdb import DuckDB + + +TABLE = "test_table" +SCHEMA = "test_schema" +TABLE_MULTIPLE_PARQUETS = "test_multiple_parquets" +DATABASE_PATH = "test_db_123.duckdb" + + +@pytest.fixture(scope="module") +def duckdb(): + duckdb = DuckDB(credentials={"database": DATABASE_PATH, "read_only": False}) + yield duckdb + Path(DATABASE_PATH).unlink() + + +def test_create_table_from_parquet(duckdb, TEST_PARQUET_FILE_PATH): + duckdb.create_table_from_parquet( + schema=SCHEMA, table=TABLE, path=TEST_PARQUET_FILE_PATH + ) + df = duckdb.to_df(f"SELECT * FROM {SCHEMA}.{TABLE}") # noqa: S608 + assert df.shape[0] == 3 + duckdb.drop_table(TABLE, schema=SCHEMA) + duckdb.run_query(f"DROP SCHEMA {SCHEMA}") + + +def test_create_table_from_parquet_append(duckdb, TEST_PARQUET_FILE_PATH): + duckdb.create_table_from_parquet( + schema=SCHEMA, table=TABLE, path=TEST_PARQUET_FILE_PATH + ) + df = duckdb.to_df(f"SELECT * FROM {SCHEMA}.{TABLE}") # noqa: S608 + assert df.shape[0] == 3 + + # now append + duckdb.create_table_from_parquet( + schema=SCHEMA, table=TABLE, path=TEST_PARQUET_FILE_PATH, if_exists="append" + ) + df = duckdb.to_df(f"SELECT * FROM {SCHEMA}.{TABLE}") # noqa: S608 + assert df.shape[0] == 6 + + duckdb.drop_table(TABLE, schema=SCHEMA) + duckdb.run_query(f"DROP SCHEMA {SCHEMA}") + + +def test_create_table_from_parquet_delete(duckdb, TEST_PARQUET_FILE_PATH): + duckdb.create_table_from_parquet( + schema=SCHEMA, table=TABLE, path=TEST_PARQUET_FILE_PATH + ) + df = duckdb.to_df(f"SELECT * FROM {SCHEMA}.{TABLE}") # noqa: S608 + assert df.shape[0] == 3 + + df = pd.DataFrame.from_dict( + data={ + "country": ["italy", "germany", "spain"], + "sales": [100, 50, 80], + "color": ["red", "blue", "grren"], + } + ) + + df.to_parquet("test_parquet.parquet") + with pytest.raises(BinderException): + duckdb.create_table_from_parquet( + schema=SCHEMA, table=TABLE, path="test_parquet.parquet", if_exists="delete" + ) + + duckdb.drop_table(TABLE, schema=SCHEMA) + duckdb.run_query(f"DROP SCHEMA {SCHEMA}") + Path("test_parquet.parquet").unlink() + + +def test_create_table_from_multiple_parquet(duckdb): + # we use the two Parquet files generated by fixtures in conftest + duckdb.create_table_from_parquet( + schema=SCHEMA, table=TABLE_MULTIPLE_PARQUETS, path="*.parquet" + ) + df = duckdb.to_df(f"SELECT * FROM {SCHEMA}.{TABLE_MULTIPLE_PARQUETS}") # noqa: S608 + assert df.shape[0] == 6 + duckdb.drop_table(TABLE_MULTIPLE_PARQUETS, schema=SCHEMA) + duckdb.run_query(f"DROP SCHEMA {SCHEMA}") + + +def test_check_if_table_exists(duckdb, TEST_PARQUET_FILE_PATH): + assert not duckdb._check_if_table_exists(table=TABLE, schema=SCHEMA) + duckdb.create_table_from_parquet( + schema=SCHEMA, table=TABLE, path=TEST_PARQUET_FILE_PATH + ) + assert duckdb._check_if_table_exists(TABLE, schema=SCHEMA) + duckdb.drop_table(TABLE, schema=SCHEMA) + + +def test_run_query_query_with_comments(duckdb, TEST_PARQUET_FILE_PATH): + duckdb.create_table_from_parquet( + schema=SCHEMA, table=TABLE, path=TEST_PARQUET_FILE_PATH + ) + output1 = duckdb.run_query( + query=f""" + --test + SELECT * FROM {SCHEMA}.{TABLE} + """, # noqa: S608 + fetch_type="dataframe", + ) + assert isinstance(output1, pd.DataFrame) + + output2 = duckdb.run_query( + query=f""" + SELECT * FROM {SCHEMA}.{TABLE} + WHERE country = 'italy' + """, # noqa: S608 + fetch_type="dataframe", + ) + assert isinstance(output2, pd.DataFrame) + + output3 = duckdb.run_query( + query=f""" + SELECT * FROM {SCHEMA}.{TABLE} + ---test + """, # noqa: S608 + fetch_type="dataframe", + ) + assert isinstance(output3, pd.DataFrame) + + duckdb.drop_table(TABLE, schema=SCHEMA) diff --git a/tests/unit/test_file.xlsx b/tests/unit/test_file.xlsx index 197881d2b04cd201a0c54c19855133383f6debae..4a03e058ad12b7605c54ac7b45fcf029122abea1 100644 GIT binary patch literal 11067 zcmeHN1z42bwx+v#2myhS)PX@7N$C`&k!Cy!cbzRQlKOP4nm|~nTZHR4PbSSY$q2=Wp7tq#aHbioC?dl=ZiM?6$HmI}|q{bf;S_CO~#2F5T9l!`ZYhdCkb6knlh?_eA66Q%LAp@^E8=JeDBeFq#adza#tVz5JpRI`&5w( zoLy~1OhBC5IhP;uExV6#Ad*CR%tPx-VsMOnU^1Qe9`%E=pwcOPtA{r@7$JZwhPh&m zX+%o&OaPtdoP2Mh?NMFFpl;g)(Pwxn#b(w@G1s<$BH+_HsIi38cdv3&L@KY&whErZ zxrrj?cCL?%EEYviC-b{XZkOvS$)cj68kqSY!^6R$BErEb{j;Iu`2SG;t3nC>|4{zf zP&!%Ln%kKg8{6C6yZZB!;1jxvT9V|b(~5;t@I^ymf)|k6@16zcY;@=3R8DHg^>Iex8TR_y7N;(GzPY<`kj~2I1p!HM}dEcnHNq3I+^|%`b7Z0}#s?T;E zIYtt7x?)SZ-wb9Q4Sv_1Ecn(r;&`xe+-$UCCYYQwl~h`j_-Un7A*Q5daOMondWfTS z_3Wf&pg;gbwRsmP;!5B9rccEFT8m9-i za}Tmp?SHI>&DNWqwqS9#eLUb)O)Iofb@;-ja`@b4XlM?U9FAKsq-IUsdicF4Y|L)* z`=ZB*i^~yYJ2oPJ6H?#B4$yVDV2Hl3P6s9VZW(0@xSdyTm6<@o>DVmeK8xSF8wpLT zruEQBrp*6P9vhN-bZ^b4d(ZZu^I~q|)Yieyy`uU#HO^_(;q#(eK@cx*g1}p+gO+;I zoj2W+L#XAk+nc$ApU13UKDby-KO0T{jxg2j;#fQKA?6}(N)aCiw`XY+G88*TtrmNs zk!XF-@$>APgj?YkRaGvmcdfrm2x;k9eLr_6TkqY$!Ho&YJ*kIOhd4Q%jjxA0j}4u@ zccMz_DJy}d?X52Ay>RMj#>%IBux7(OV%d*doztze@qO9EX?Lf6*YwAc<8VN*Q!h)? z5V6pt+Nav!Lx+}?o#0({_L04lVf*yVxssS4(9DbC+1AwrDyEdWZX$E8YmljbfhYK))E5@cR*Zg zdW@-i-fL2@wSas0bf%NKwzer&h|A6Io^J2u#q9Ee;O0jz*JqRpB)lJ=-7YQW^18K2rtB%f)@@$jc92dHvCKBxV$eC9KbC(!o^a)U-!rp$Iu`Ajnod8k=}+<;L6 zaDX%Z%4E)=16@gw8#1Z_4&0Cb&~7iEd1(S9boCke1Hqi}DeYMDCZ-%>P=I^dic^An zF7JX{H`A1b!jY9-NBJ9-x~aq;*h|ryJX!9YybGaE8{rcpeLO=vp`LF%(>!xMOW>-7 z^~lgz5xvZ43bBz|y&CxN;!qJzg!O39SdqMfXbSO>B)l7B@Z!)B_(k*>(O8ka-qREk zBei-rXyV0TBDjg@fzVh{yxM6B0Z0-)4bSl6uo0?7^mx!%QN31Z3aOA(5!^)el+ai)yrgLh znUEy>8glUBh!Ltq_0-W=F}=)a3)zrb{Tiz9;z$urMD_I0Sh2iWpX@Ztan z{9;Csz;&$1B-&!IxTAkt58epi_TMoc@1UN;HL<)Eo(??)AbFrQCW~%;#yjOj^1$=@ zXK?=UPy9Fhr_?%DP!eqsnBLLsr0<6;J1~9&%t7 zs6)KKKVFl#zemy`=9y5e{K~igFEEz z-PrB)VK<-U#iq;e4n&rtF{det8`$DJi7{Z13pb)6+Ks;NusA zK>qI;8+$;*02KaaJqX!4mQRv?AsEZi|Nop3ZJ^%YzkSi*!PMKU0(fU{-2~RF=$$#* zymYTPKRT_OHTih%tmg3Js_Uny9A+TLhJ&-n`J=9%?x%oqt-sQ>HMY3kW!=4eAJ!|2 z(FDRnnI?1&0J%ZG`2})rNcdW1+|rZE#I?9ZzuiCfCMuj` zq2Nl2l_}sa;{2GDQpzIOfTbx5elNCNla02t3h5i$+w&rwDaBwM1s&{>-8sKL_{~an zAUjqx{qytD+U6(-iMUQsLGT{orSg%Vy(V7*TOl({{rJ1e$F&$-^_V}EkJl4cZ$NMo z$$Fcf!w#SeX*{L-4+mYrPo9+PRJWq5f+Wh&SH^n1KHYX!PMZKM?eOY`vV94l%g;w# z^ezOJ2!8&^Hcq1&WfD75KbcI*GDbka#gM1r13n?dJfXda*$>@(=vP!q^bS8>M|uPZ zhxS+>!>?I$JX^OcRE2SnZVJBN7OPVqKi_M!iv3*#=f0HGp&(Y-1c5K6KVL8fuPD!l zl*fAyA8Oj9^rv~HA=vM$r0(kOa8<;HqRI3ChI z%ttkc+q$0>5c}g8ti_S+ z5vKM;js_|6uCcvrnrGs}zD2BrrT&`q*?vSd6Hm6pa^i3t`X=8tC-79dsa}D|JH^?7-bJNz=UM>m=r;{;5@0ZTAf)4YPR&Ya zDVQU$Gk!oW&!oq67R!psFh3M~+BK7taB@A4c>5Dz()R90J6_sZ{r-dGX`4))gwc@^ zwirT7Bh$wD)o9B7RSM&6-)!?vT^Z|&%Hxvg3*^guTMgg(z>EL~H;VE{`SypLyUM!X z<6IdgzS(FP+JuZ?LxddlFpxGVd~(u)2=lIX&uJI;sqtCj9mhM=66W;t0*De48iEN= zn=%9~4;n1ec)P%2ZpWyvC=SEX@OE?2KK15-sGmO4f%A?sGehhA^D4Y zlsPQ>^uerS=ogm4A2~GA7!D7U0#^iwK&a|bUC7ENSb_E}zC)PV9 z7fuHuE59~)QsfGM83aH0J>t?oOS+#L+3%3daVL#Vp=07{)jsP^Q6Xl&0w(5!}|B65C1%u3m7kHDR?6dnuoK+8(JIyVZ`lxn^mx zNS;?Pnq0GdA@vqhz}g=prIPeu$Xg1!{DSOEqDPWm5p|xWSLItr@YloFTB#Y&c7(Vq z40KF#ecawU)15rp|3$i>_Tkm=ZB)a6^O=kaP0sEIxc9Vqm09T1gtoG`EOHz8hgEGm zbA-|(G7@%@c=~p%^9E+x0LzG5=rVCxi4G*2>@LB_jcEwM@VP%kyKmWXBTEjVB@t49 zXLRnA88#);yC=bQWPhytc8tQAXjU(0@mU~#evKi;SE`rzYjW%g|LzQYNVB=q&D7&Gp5a_JF zdv?b;(JVI8@g+y>PqrBlQ97<-8o}5m+pddYT@ibn2K~u4p);l+IvCq3{yf{R((U)y zrYa(y;7BK0{sMBWHI!vB|_YtU^DVMvx6eCpadX5D9$engDvf)|Q_B@nut-#1wkK zYA{uhe9VUL1=l<6xV&!!?)i3l-RsNLV`UrfmQ4pOZxedd;Em4F5zEpaze+{wnW|Q- zSb(vO#6Y>o{fcd=Mt#wLpKXLck9m7jV@uYMch|%=;X1wzhGomZ@5~ z_{|e)%_pm6tKg#TjK*OEF0=DGGB-_-+H#QLQr5N%7GBgPsmc^9~9k!L5t4_CLwhZ zi904)5NLiR?-DBdikjF`BGOO;96b}=IGSAFDiof|NmDJrW8&K$>`NG&5p_4J6b}!S z$P^afi^nv*NdF@y>Wf&kzvmCJcdu~^XKHoM@dfI}9^u>q>4?c*(2Q!Qq6jbGl@HaE zt06YIEipXEnx&X^btm`q5q`+7J!VI8KABsDl1+|NX@k?&Np-YrDvJt)RdLEJz|jU@q)2) zh-hoi`_uTuQAxl#bf*?!c{XRG-P4^}$`{eEdXXuULGUTQfEDhiII6wClIYq2W;S*X z#6f+?M0Wie-TSmxk7RIv1Vt5y$`bI(H`j8k201>9(x!i=es8F;g>sF3y7DxsV?JV9 zl+AFB*g#q3GXhbiQUJPw?OHlziM5L}^V-Td6Dq4#^#Zr!Yf%ji$j*KQ*RCrGriy2Q zwC=qo^*oj)DtvY)-8)TXJsAePsRsH&SqOk9%BaZG#pZ`kGtdIzR0v`ma!wTo1>G{T zqDHXC0O-~$N%>!>@LL_%fg_xP2wZ-bCrB0&ivaB(AX} zz>%7hyFN=2{CiVG*vr$y!vobtg`tf z(UfMVU2FX;p(dq+N^K#0(xKCD&pp=JO$?OI)|yc{_a)QyGk_YY)wU$4h8H^MZpgj7 zscBThvX^(DIM!Z#gnD_b8K-&aGs5nm<>UO(*-rG6Xm+Ohw#G&(_O_RIB`-Ja4Igwh1OoFojQ|IqC8UU|)0x=n0xTduL&M>)8 zbM&+IJ;EW$qrncuZdK*YA0FjjJgJHiRI{)H*9r`t!K!CewB_l<>IT^QqW-X&hP51* zw`A??T`Y{Rmj3avh}9Sf&t2p~c`Rfm|rkoa9?#Q@Lm%+}=t`!== zTDAv953n>R%O9b5V|`mH#ZIZ#?+Q^R|E|_29TJa{I0G+M4ym(?8hT*GkzhHr7T)#T z*vz0}gM)>)7-JUA}+p*x0`yDeO!>T3NwAfS?To`W-LRs z(l+wKMnZU03R_oR_=k%gk&+*_6Tdkt<+UN1KL50ln;5g;dirqKmQH0RNkv=Iblngk zta$o%8-cki@B?0_()2yff~oDqt-~KuS*0%%9AycWjGS3T__Pd!Gxw~Pl{!<23A)t} z!k^{uTOO_-77laNDODncz2AGmIUc^4UTQ6sbM$D5D2YOPi9vyj zZKE;T1jl)3Wd=v9SOS=U^g1FnyEkL*-U_C$Bd#$BAH`pQY3V-}NbViX-!5pHsN9+V^q3m=Rl&mCb>Ueki zG41;-aTDR!Mk%V$uWQn)3sSX|?+Y~Ve^&|AXkrMd*GwwgWMaye zjl91Y!ezC`+(NetPs0{Rw>Bn2fro0)Rrh#Dad~&#ZUd=dTLzNXSGmy*anK{ zr`SNnx)}r|!Vs0O5|Nq^<@GGOBP6pPw9XVUW|%FdLiCU|^+Vo4p35dwa?(ObzH_qi zh}^Oj%D9M?C#o~XF;{SswdU3%Af0O*Lc%fWvov=4YzHep9W<9)wZrPGp1d8L?S|Ks zixIku@2J--@6{uP@#S?YG$)cFkn`10QyAF!WlXyx-<(wzVbA^x)Qk4k)NEpbR6epo zPGwrgE>X1T4@ez{;Q|TLL`@ZOCQ`9RTVKm(uu%ik-sbAEY3fR&9{XvlZ^ao{^fr#O zqVITp*d~Tc5=L30xKCLUQdtX<<3lZ}19d&aS^SP0%I7S)6Y}_W44XJcxA+EyfuU!5 z487up4{hCi{<`>xZTk zvI8x!Z@nuMqZQ6F%Wc}&coc>i3q9I-nVA>sVt~7=Ps8%A%~sEpc%_jXerUf&5DrsW zlkHA1qh1S~!$+ojJneB-qCyo71O`5y4XAL@#R%!cYgE(+f=owz8%)oQE|&^Bf-H^g^^NrH_2&$@zJenNW(+t9W;sehpkmICATYbK zbK?hE28ON{Q^rqbRu%>(7AB^ic$_4&rKN?1oW-2wAVhQ-S7O_u%*B=f6ijsf#Y_@o z;GNZzdD74Piy2vq=_D*5{y^t+q$As+=O8H%M$zHXNu^8??@@s7;R&aCKSLYl&G~*&AEg>ppgQZfvJ>xs`T5(Rv1>ih#(b-maDb15@;)+4;2a z-m<=3I4yIkkhZ8kt6Oz1i20c~DyE<~KMwfUPy~#jsd(FO-G*(%3P}Injth7rIvC^u*Xduzu#|<{*Bwxhp ze33i0pF6YIK>%#H&Yx*Bp^~h6#yl>j^_$sX_(_T+L`hwasGxZ>ipFn?R(BNxwX__J zObWT9P&m8EnLlQ&8F~eNQJA@}3N-bDNLQrx@iNC2V3c0(FMTPfzXrJC%?O6Kk z{lGj2!QwU~cJWqf^K>Zre$LpM#sjefZ%Awf*Kln=_LnS<&soIP>E!P$?;UD$zR>h4 ztDhK9T#7+`F>*?kc_pj9O6?!hfMuV~RnrNNKm_;e$+YVaT3tSw_S^Bdo>Nnj{YOz` zVW0auC|8ovKaLUuYngus<*F6FiE>>;{_TopV5Kk&<)&zQ)mGj_xh}(AieJBm2>wl! z-<6+l0$$hIe(Tmlf}4Q9wA`C0*M+Lzy7duO#{YJ4*WCK0(A@;QE=c^=tt#T1fLCt) zZp^QD^uKkB4hH%+-MYfM+S+frb-i=B+%kR*M!*%|OkJY3d|zeYLrU&Ha2D)Q#!>qp~deer8_(Oymd8`Z{5 zoa^V#<>~Tku)w~eYsc!%iPtys-%cc PeArJItP+R=UcUPePzA8k delta 2614 zcmZ9N2{e>#8^^~C*~eg%C5>r@VI(F?vP>nr?E6|-GK41SC1NsDc7?|h;w{UNG9|Rw zr?Q1eWY1(9sYVjA<(qux{l2`EC`m=Hx`tuBEE~EIH)T#uZcsaAUN&1%KUY+%gS7Dx_gnu0#GB@rdYnl zp0b){R^3fk<|iFbr|Rr=n*JJZXZH=xX^RNE=9OyOU``)jVYc&ynWFLwO^In_lr;mn zo?4~HvNMTW7HG2PGcErtu6Nz+MM_RW4so#vmWB)rnkMZoa}h~jK|0X0=apqnq*DM` z2QEgQFQFZ15=6QE4Ku+OnMd09y11+TMbtE`fSba@6S%a^s2njZ>~J35kQcMZs(akw2y1lO=MgHh6^u1 z<`COYT5(T$c>Phvpk_-{>Y{LaTK<&`=lzoI))pScC6Z@7eqA_$eM@cHJG%HnYAmzH z^{O*CkkAfGy){#AK zS!V+^+&jOn_8gM4_v6UnuX#^yTr)7p5AZFd~9hp`I5I?oJ^e z^)n%Tc}`CKIT~L z=h#AYYq`^!_hj*#NHuOSc(;O2o z+KiAC9yIU6KB<+=bbO9r2h1}PR3}*r0)YtZn0Ll|M_PtN4;qz$ZIXXMCi0tW6UyWs zOHWUfW-X_b_bIkKGUR9hYgDf3I3TkRkusGBcyJjmf;XQRx*$3Um&@)&UkBw*{&lXRgflxwb ztrKP9V<-w#j^aD@MzJw5!klXx`f{PSlM6)s^scP0$4`rJw1{XOT?jWOIo z2e}CIMMJI3^{h!Er3!Mhrr)sD?@hw{V(=b_FfpW}pq8t&<6CZBFokKyI9^c$Kiwos zA}9{4HE*B?FDPDQ6b_7{32(eO%Ku`BwjDYbue`BqsIN z?h<_bVCCIa7pPRO-^??TzEtGwB*1((l(|ZIt@IOWb-+{ zMh3y11GECo*!U24PoIun=|C?>iV!7xA5PZShuUrh88WS520L$@3|+yES=XyZ*G%09 zic(3<=azI$x^hp&`^! zoSQO`+A6>dskMo1X0r0{7U}wnR(CWcLXp0Q^K*dulB4FE+`sewSnF|JKRf3%&VS|I ziEV~|9-s#yMoKdadZ7Y~vFy>-|3LB5SBx_rw98<>!~2>BMLjD!)~dxYi;FI!_IB(V&I`YmbD0Gw-NIV z@B5Jy0NbKfs20a3AY|r9X$E>PzeF9}jE@HV;;^>QZxAo1x?lha8_ck`Id%9h!M;Ewl;b%=cz&7OuXD@FW5#22Sm94%ERT}fPZ zaXaaQRuxj+Y=fnGCXB?EG}D-|XTJ1G;BF=J&6P%@Go{?3{_+*597*Pme=8T?4Y$}h zPUE$ijZ3w{@_(293}*4u48~(_m?xh4P2K!;LAeo|x*X(R)PW(SBB?y>IJ=j?4A`jS zh6Wx3Hm32n&E>sEsNAsL!9x|YNb}*fnK-l4_&25PYM-t>Wj7$l-?Ht=`&%ytN!TZ0F3}~Wex7#bfESgkp+hM_`vH|{u z{4S;*bVTM_&T`Wvk8!$?(-(8Ee&x$!1LKt@F6c6u=su6SC#~A5pQ}q2^D3KniDokt zaSE*k#~zY3qMYMzHZRUvz|-mH9?`vImyJE~wK)fuh3_%EEi(wgH&$%Y{ChYukOjU) z(DvgqJ}$3${JG5150fSr%I)aBKQ5=cLUFlxPr-A!iLp_Q#Wc=Wgk*AO0~&~*@M}GN zzS6zp=u}`G9;d1$t{^(h5>S`xUAhMyI6}yK5vj+5dGG6 z+0We(i7&5*wjIe(CGqb~o4XoSN4S3^6PmR3wJS2_k=9%X%eT9M-ORap0^_Qz)Y*2E z*e-GoFdX!M#9!s4fERM|GVF2Y^yknM|EKu3S2GUajEER;A0xb-k4B6XI1vK8g-Zb| znBCi-@Cs7ko1B2F0&@GPKj#U2$5B7If!hlEw?%(8AHf}>B_7~|g4DL?&sq`MA>!f( zB(T!kqCY((xkD5$0L&uy0=MJ^wyznDl>*la0{K{p?R|fe5#6zm3I$+_(ojASA1H@y K%)PMRfByrBtARNH diff --git a/tests/unit/test_genesys.py b/tests/unit/test_genesys.py new file mode 100644 index 000000000..84325b3be --- /dev/null +++ b/tests/unit/test_genesys.py @@ -0,0 +1,557 @@ +import json +import logging +from unittest.mock import AsyncMock, MagicMock, patch +import warnings + +import pandas as pd +import pytest +from viadot.exceptions import APIError, CredentialError +from viadot.sources import Genesys + + +warnings.filterwarnings("ignore", category=DeprecationWarning) + +variables = { + "credentials": { + "client_id": "test_client_id", + "client_secret": "test_client_secret", + }, + "request_headers": { + "token_type": "Bearer", + "access_token": "test_access_token", + }, + "headers": { + "Authorization": "Bearer access_token", + "Content-Type": "application/json", + }, + "entities": [ + { + "id": "report1", + "downloadUrl": "http://example.com/report1", + "status": "COMPLETED", + }, + { + "id": "report2", + "downloadUrl": "http://example.com/report2", + "status": "COMPLETED", + }, + ], + "entities_fail": [ + { + "id": "report1", + "downloadUrl": "http://example.com/report1", + "status": "FAILED", + }, + ], + "entities_run": [ + { + "id": "report1", + "downloadUrl": "http://example.com/report1", + "status": "RUNNING", + }, + ], + "content": b"id,name\n1,Report1\n2,Report2", + "expected_data": {"id": [1, 2], "name": ["Report1", "Report2"]}, + "report_url": "http://example.com/report", + "data_to_merge": [ + { + "conversationId": "conv1", + "participants": [ + { + "externalContactId": "ext1", + "participantId": "part1", + "sessions": [ + { + "sessionId": "sess1", + } + ], + } + ], + } + ], + "expected_columns": [ + "conversationId", + "externalContactId", + "participantId", + "sessionId", + ], + "expected_data_to_merge": { + "conversationId": ["conv1"], + "externalContactId": ["ext1"], + "participantId": ["part1"], + "sessionId": ["sess1"], + }, + "mock_post_data_list": [{"data": "some data"}, {"data2": "some data2"}], + "mock_report": { + "conversations": [ + {"conversationId": "conv1", "participants": [{"sessionId": "sess1"}]} + ], + "totalHits": 100, + }, +} + + +@pytest.fixture() +def genesys(): + """Return Genesys instance.""" + return Genesys(credentials=variables["credentials"], verbose=True) + + +@patch("viadot.sources.genesys.get_source_credentials", return_value=None) +def test_init_no_credentials(mock_get_source_credentials): + """Test raise error without credentials.""" + with pytest.raises(CredentialError): + Genesys() + + mock_get_source_credentials.assert_called_once() + + +def test_init_invalid_environment(): + """Test Genesys invalid environment.""" + with pytest.raises(APIError): + Genesys( + credentials=variables["credentials"], + environment="invalid_environment", + ) + + +@patch("viadot.sources.genesys.handle_api_response") +def test_headers(mock_handle_api_response, genesys): + """Test Genesys `headers` property.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = variables["request_headers"] + mock_handle_api_response.return_value = mock_response + + headers = genesys.headers + assert headers["Authorization"] == "Bearer test_access_token" + + +@pytest.mark.skip(reason="Needs to be fixed.") +@patch("aiohttp.ClientSession.post", new_callable=AsyncMock) +@pytest.mark.asyncio() +async def test_api_call_post_success(mock_post, genesys): + """Test Genesys `_api_call()` method called with POST.""" + mock_response = AsyncMock() + mock_response.read.return_value = json.dumps({"key": "value"}).encode("utf-8") + mock_post.return_value.__aenter__.return_value = mock_response + + response = genesys._api_call( + endpoint="test_endpoint", + post_data_list=[{"data_key": "data_value"}], + method="POST", + ) + + assert response == {"key": "value"} + mock_post.assert_called_once_with( + f"https://api.{genesys.environment}/api/v2/test_endpoint", + headers=genesys.headers, + data=json.dumps({"data_key": "data_value"}), + ) + + +@pytest.mark.skip(reason="Needs to be fixed.") +@patch("aiohttp.ClientSession.post", new_callable=AsyncMock) +@pytest.mark.asyncio() +def test_api_call_get_success(mock_get, genesys): + """Test Genesys `_api_call()` method called with GET.""" + mock_response = AsyncMock() + mock_response.read.return_value = json.dumps({"key": "value"}).encode("utf-8") + mock_get.return_value.__aenter__.return_value = mock_response + + response = genesys._api_call( + endpoint="test_endpoint", + post_data_list=[], + method="GET", + params={"param1": "value1"}, + ) + + assert response == {"key": "value"} + mock_get.assert_called_once_with( + f"https://api.{genesys.environment}/api/v2/test_endpoint", + headers=genesys.headers, + params={"param1": "value1"}, + ) + + +@pytest.mark.skip(reason="Needs to be fixed.") +@patch("aiohttp.ClientSession.post", new_callable=AsyncMock) +@pytest.mark.asyncio() +async def test_api_call_post_failure(mock_post, genesys): + """Test Genesys `_api_call` method failing when called with POST.""" + mock_response = AsyncMock() + mock_response.read.return_value = b"Bad Request" + mock_response.status = 400 + mock_post.return_value.__aenter__.return_value = mock_response + + with pytest.raises(APIError) as context: + genesys._api_call( + endpoint="test_endpoint", + post_data_list=[{"data_key": "data_value"}], + method="POST", + ) + + assert "API call failed" in str(context.exception) + mock_post.assert_called_once_with( + f"https://api.{genesys.environment}/api/v2/test_endpoint", + headers=genesys.headers, + data=json.dumps({"data_key": "data_value"}), + ) + + +@patch("viadot.sources.genesys.handle_api_response") +@patch.object(Genesys, "headers", new_callable=MagicMock) +def test_load_reporting_exports(mock_auth_token, mock_handle_api_response, genesys): + """Test Genesys `_load_reporting_exports` method.""" + mock_auth_token.return_value = variables["headers"] + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"entities": []} + mock_handle_api_response.return_value = mock_response + + result = genesys._load_reporting_exports() + assert result == {"entities": []} + + +@patch("viadot.sources.genesys.handle_api_response") +@patch.object(Genesys, "headers", new_callable=MagicMock) +def test_load_reporting_exports_failure( + mock_auth_token, mock_handle_api_response, genesys +): + """Test Genesys `_load_reporting_exports` method failing.""" + mock_auth_token.return_value = variables["headers"] + + mock_response = MagicMock() + mock_response.status_code = 400 + mock_handle_api_response.return_value = mock_response + + with pytest.raises(APIError) as e: + genesys._load_reporting_exports() + + assert "Failed to loaded all exports." in str(e) + + +def test_get_reporting_exports_url(genesys, caplog): + """Test Genesys `_get_reporting_exports_url` method.""" + entities = variables["entities"] + + with caplog.at_level(logging.INFO): + actual_ids, actual_urls = genesys._get_reporting_exports_url(entities) + msg = "Report status:\n\treport1 -> COMPLETED \n\treport2 -> COMPLETED \n" + assert msg in caplog.text + + expected_ids = ["report1", "report2"] + expected_urls = ["http://example.com/report1", "http://example.com/report2"] + + assert expected_ids == actual_ids + assert expected_urls == actual_urls + + +def test_get_reporting_exports_url_with_failed_status(genesys, caplog): + """Test Genesys `_get_reporting_exports_url` method FAILED status.""" + entities = variables["entities_fail"] + + with caplog.at_level(logging.ERROR): + actual_ids, actual_urls = genesys._get_reporting_exports_url(entities) + msg = "Some reports have not been successfully created." + assert msg in caplog.text + + expected_ids = ["report1"] + expected_urls = ["http://example.com/report1"] + + assert expected_ids == actual_ids + assert expected_urls == actual_urls + + +def test_get_reporting_exports_url_with_running_status(genesys, caplog): + """Test Genesys `_get_reporting_exports_url` method RUNNING status.""" + entities = variables["entities_run"] + + with caplog.at_level(logging.WARNING): + actual_ids, actual_urls = genesys._get_reporting_exports_url(entities) + msg = "Some reports are still being created and can not be downloaded." + assert msg in caplog.text + + expected_ids = ["report1"] + expected_urls = ["http://example.com/report1"] + assert expected_ids == actual_ids + assert expected_urls == actual_urls + + +@patch("viadot.sources.genesys.handle_api_response") +def test_download_report_success(mock_handle_api_response, genesys, caplog): + """Test Genesys `_download_report` method.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.content = variables["content"] + mock_handle_api_response.return_value = mock_response + + expected_data = variables["expected_data"] + expected_df = pd.DataFrame(expected_data) + + report_url = variables["report_url"] + + with caplog.at_level(logging.INFO): + actual_df = genesys._download_report(report_url) + msg = "Successfully downloaded report from Genesys API ('http://example.com/report')." + assert msg in caplog.text + + assert expected_df.equals(actual_df) + + +@patch("viadot.sources.genesys.handle_api_response") +def test_download_report_failure(mock_handle_api_response, genesys, caplog): + """Test Genesys `_download_report` method failure.""" + mock_response = MagicMock() + mock_response.status_code = 404 + mock_response.content = b"Not Found" + mock_handle_api_response.return_value = mock_response + + report_url = variables["report_url"] + + with caplog.at_level(logging.ERROR): + genesys._download_report(report_url) + msg = "Failed to download report from Genesys API ('http://example.com/report'). - b'Not Found'" + assert msg in caplog.text + + +@patch("viadot.sources.genesys.handle_api_response") +def test_download_report_drop_duplicates(mock_handle_api_response, genesys): + """Test Genesys `_download_report` method, dropping duplicates.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.content = variables["content"] + mock_handle_api_response.return_value = mock_response + + expected_data = variables["expected_data"] + expected_df = pd.DataFrame(expected_data) + + report_url = variables["report_url"] + actual_df = genesys._download_report( + report_url, + drop_duplicates=True, + ) + + assert expected_df.equals(actual_df) + + +@patch("viadot.sources.genesys.handle_api_response") +def test_delete_report_success(mock_handle_api_response, genesys, caplog): + """Test Genesys `_delete_report` method.""" + mock_response = MagicMock() + mock_response.status_code = 204 + mock_handle_api_response.return_value = mock_response + + report_id = "123456" + + with caplog.at_level(logging.INFO): + genesys._delete_report(report_id) + + msg = f"Successfully deleted report '{report_id}' from Genesys API." + assert msg in caplog.text + + +@patch("viadot.sources.genesys.handle_api_response") +def test_delete_report_failure(mock_handle_api_response, caplog, genesys): + """Test Genesys `_delete_report` method failure.""" + mock_response = MagicMock() + mock_response.status_code = 404 + mock_response.content = b"Not Found" + mock_handle_api_response.return_value = mock_response + + report_id = "123456" + + with caplog.at_level(logging.ERROR): + genesys._delete_report(report_id) + + msg = f"Failed to delete report '{report_id}' from Genesys API. - b'Not Found'" + assert msg in caplog.text + + +def test_merge_conversations(genesys): + """Test Genesys `_merge_conversations` function.""" + mock_data = variables["data_to_merge"] + expected_columns = variables["expected_columns"] + expected_data = variables["expected_data_to_merge"] + + result_df = genesys._merge_conversations(mock_data) + + assert list(result_df.columns) == expected_columns + for col, expected_values in expected_data.items(): + assert list(result_df[col]) == expected_values + + +@patch("viadot.sources.genesys.Genesys._api_call") +@patch("viadot.sources.genesys.Genesys._load_reporting_exports") +@patch("viadot.sources.genesys.Genesys._get_reporting_exports_url") +@patch("viadot.sources.genesys.Genesys._download_report") +@patch("viadot.sources.genesys.Genesys._delete_report") +def test_api_connection_reporting_exports( + mock_delete, mock_download, mock_get_url, mock_load, mock_api_call, genesys +): + """Test Genesys `api_connection` method with reporting exports.""" + mock_entities = variables["entities"] + mock_load.return_value = {"entities": mock_entities} + mock_get_url.return_value = ( + ["report1", "report2"], + ["http://example.com/report1", "http://example.com/report2"], + ) + mock_download.return_value = pd.DataFrame( + {"Queue Id": ["queue1;queue2"], "data": [1]} + ) + + genesys.api_connection( + endpoint="analytics/reporting/exports", + post_data_list=variables["mock_post_data_list"], + view_type="queue_performance_detail_view", + view_type_time_sleep=0.5, + ) + + mock_api_call.assert_called_once_with( + endpoint="analytics/reporting/exports", + post_data_list=variables["mock_post_data_list"], + method="POST", + ) + mock_load.assert_called_once() + mock_get_url.assert_called_once() + mock_download.assert_called() + mock_delete.assert_called() + + +@patch("viadot.sources.genesys.Genesys._api_call") +@patch("viadot.sources.genesys.Genesys._merge_conversations") +def test_api_connection_conversations(mock_merge, mock_api_call, genesys): + """Test Genesys `api_connection` method with conversations details.""" + mock_post_data_list = [{"paging": {"pageNumber": 1}}] + mock_report = variables["mock_report"] + mock_merge.return_value = pd.DataFrame({"conversationId": ["conv1"], "data": [1]}) + mock_api_call.side_effect = [mock_report, mock_report] + + genesys.api_connection( + endpoint="analytics/conversations/details/query", + post_data_list=mock_post_data_list, + ) + + mock_api_call.assert_called() + mock_merge.assert_called() + + +@patch("viadot.sources.genesys.Genesys._api_call") +def test_api_connection_routing_queues_members(mock_api_call, genesys): + """Test Genesys `api_connection` method with routing queues.""" + mock_queues_ids = ["queue1"] + mock_response_page_1 = {"entities": [{"userId": "user1", "name": "Agent1"}]} + mock_response_page_2 = {"entities": []} + mock_api_call.side_effect = [mock_response_page_1, mock_response_page_2] + + genesys.api_connection( + endpoint="routing_queues_members", queues_ids=mock_queues_ids + ) + + assert mock_api_call.call_count == 2 + + +@patch("viadot.sources.genesys.Genesys._api_call") +def test_api_connection_users(mock_api_call, genesys): + """Test Genesys `api_connection` method with users.""" + mock_queues_ids = ["queue1"] + mock_response_page_1 = {"entities": [{"userId": "user1", "name": "Agent1"}]} + mock_response_page_2 = {"entities": []} + mock_api_call.side_effect = [mock_response_page_1, mock_response_page_2] + + genesys.api_connection(endpoint="users", queues_ids=mock_queues_ids) + + assert mock_api_call.call_count == 2 + + +@patch("viadot.sources.genesys.Genesys._handle_if_empty") +@patch("viadot.sources.genesys.super") +def test_to_df(mock_super, mock_handle_if_empty, genesys): + """Test Genesys `to_df` method.""" + mock_super().to_df = MagicMock() + mock_handle_if_empty = MagicMock() + genesys.data_returned = { + 0: pd.DataFrame({"A": [1, 2], "B": [3, 4]}), + 1: pd.DataFrame({"A": [2, 3], "B": [4, 5]}), + } + + result_df = genesys.to_df() + result_df.drop( + columns=["_viadot_source", "_viadot_downloaded_at_utc"], + inplace=True, + axis=1, + ) + expected_df = pd.DataFrame({"A": [1, 2, 2, 3], "B": [3, 4, 4, 5]}) + + assert result_df.equals(expected_df) + mock_super().to_df.assert_called_once() + mock_handle_if_empty.assert_not_called() + + +@patch("viadot.sources.genesys.Genesys._handle_if_empty") +@patch("viadot.sources.genesys.super") +def test_to_df_duplicates(mock_super, mock_handle_if_empty, genesys): + """Test Genesys `to_df` method, dropping duplicates.""" + mock_super().to_df = MagicMock() + mock_handle_if_empty = MagicMock() + genesys.data_returned = { + 0: pd.DataFrame({"A": [1, 2], "B": [3, 4]}), + 1: pd.DataFrame({"A": [2, 3], "B": [4, 5]}), + } + + expected_df_no_duplicates = pd.DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]}) + result_df_no_duplicates = genesys.to_df(drop_duplicates=True) + result_df_no_duplicates.drop( + columns=["_viadot_source", "_viadot_downloaded_at_utc"], + inplace=True, + axis=1, + ) + + assert result_df_no_duplicates.equals(expected_df_no_duplicates) + mock_super().to_df.assert_called_once() + mock_handle_if_empty.assert_not_called() + + +@patch("viadot.sources.genesys.validate") +@patch("viadot.sources.genesys.Genesys._handle_if_empty") +@patch("viadot.sources.genesys.super") +def test_to_df_validate(mock_super, mock_handle_if_empty, mock_validate, genesys): + """Test Genesys `to_df` method, checking validation function.""" + mock_super().to_df = MagicMock() + mock_handle_if_empty = MagicMock() + genesys.data_returned = { + 0: pd.DataFrame({"A": [1, 2], "B": [3, 4]}), + 1: pd.DataFrame({"A": [2, 3], "B": [4, 5]}), + } + + validate_df_dict = {"some_key": "some_value"} + result_df = genesys.to_df(validate_df_dict=validate_df_dict) + result_df.drop( + columns=["_viadot_source", "_viadot_downloaded_at_utc"], + inplace=True, + axis=1, + ) + + mock_validate.assert_called_once_with(df=result_df, tests=validate_df_dict) + mock_super().to_df.assert_called_once() + mock_handle_if_empty.assert_not_called() + + +@patch("viadot.sources.genesys.Genesys._handle_if_empty") +@patch("viadot.sources.genesys.super") +def test_to_df_empty(mock_super, mock_handle_if_empty, genesys): + """Test Genesys `to_df` method, checking empty response.""" + mock_super().to_df = MagicMock() + mock_handle_if_empty = MagicMock() + genesys.data_returned = { + 0: pd.DataFrame({"A": [1, 2], "B": [3, 4]}), + 1: pd.DataFrame({"A": [2, 3], "B": [4, 5]}), + } + + genesys.data_returned = {} + result_df_empty = genesys.to_df() + + assert result_df_empty.empty + mock_super().to_df.assert_called_once() + mock_handle_if_empty.assert_not_called() diff --git a/tests/unit/test_hubspot.py b/tests/unit/test_hubspot.py new file mode 100644 index 000000000..5ecf74fbc --- /dev/null +++ b/tests/unit/test_hubspot.py @@ -0,0 +1,214 @@ +from datetime import datetime +from io import StringIO +import json +import unittest +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest +from requests.models import Response +from viadot.exceptions import APIError, CredentialError +from viadot.sources import Hubspot +from viadot.sources.hubspot import HubspotCredentials + + +variables = { + "credentials": {"token": "fake_token"}, + "filters": [ + { + "filters": [ + { + "propertyName": "createdate", + "operator": "GTE", + "value": "2021-01-01", + } + ] + } + ], +} + + +class TestHubspotCredentials: + """Test Hubspot Credentials Class.""" + + def test_hubspot_credentials(self): + """Test Hubspot credentials.""" + HubspotCredentials(token="test_token") # noqa: S106 + + +class TestHubspot(unittest.TestCase): + """Test Hubspot Class.""" + + @classmethod + def setUpClass(cls): # noqa: ANN206 + """Defined based Hubspot Class for the rest of test.""" + cls.hubspot_instance = Hubspot(credentials=variables["credentials"]) + + @patch("viadot.sources.hubspot.get_source_credentials", return_value=None) + def test_init_no_credentials(self, mock_get_source_credentials): + """Test raise error without credentials.""" + with pytest.raises(CredentialError): + Hubspot() + + mock_get_source_credentials.assert_called_once() + + def test_date_to_unixtimestamp(self): + """Test Hubspot `_date_to_unixtimestamp` function.""" + date_str = "2021-01-01" + expected_timestamp = int( + datetime.strptime(date_str, "%Y-%m-%d").timestamp() * 1000 + ) + result = self.hubspot_instance._date_to_unixtimestamp(date_str) + assert result == expected_timestamp + + def test_get_api_url(self): + """Test Hubspot `_get_api_url` function.""" + endpoint = "deals" + filters = None + properties = ["property1", "property2"] + expected_url = ( + f"https://api.hubapi.com/crm/v3/objects/{endpoint}/" + + "?limit=100&properties=property1,property2&" + ) + result = self.hubspot_instance._get_api_url( + endpoint=endpoint, filters=filters, properties=properties + ) + assert result == expected_url + + def test_format_filters(self): + """Test Hubspot `_format_filters` function.""" + filters = variables["filters"] + formatted_filters = self.hubspot_instance._format_filters(filters) + assert isinstance(formatted_filters, list) + + def test_get_api_body(self): + """Test Hubspot `_get_api_body` function.""" + filters = variables["filters"] + expected_body = json.dumps({"filterGroups": filters, "limit": 100}) + result = self.hubspot_instance._get_api_body(filters) + + assert result == expected_body + + @patch("viadot.sources.hubspot.handle_api_response") + def test_api_call_success(self, mock_handle_api_response): + """Test Hubspot `_api_call` method.""" + mock_response = MagicMock(spec=Response) + mock_response.status_code = 200 + mock_response.json.return_value = {"results": [{"id": "123"}]} + mock_handle_api_response.return_value = mock_response + + url = "https://api.hubapi.com/crm/v3/objects/deals/?limit=100&" + result = self.hubspot_instance._api_call(url=url, method="GET") + + assert result == {"results": [{"id": "123"}]} + mock_handle_api_response.assert_called_once() + + @patch("viadot.sources.hubspot.handle_api_response") + def test_api_call_error(self, mock_handle_api_response): + """Test Hubspot `_api_call` method failure.""" + mock_response = MagicMock(spec=Response) + mock_response.status_code = 500 + mock_response.content = b"Internal Server Error" + mock_handle_api_response.return_value = mock_response + + url = "https://api.hubapi.com/crm/v3/objects/deals/?limit=100&" + + with pytest.raises(APIError): + self.hubspot_instance._api_call(url=url, method="GET") + + def test_get_offset_from_response(self): + """Test Hubspot `_get_offset_from_response` function.""" + response_with_paging = {"paging": {"next": {"after": "123"}}} + response_with_offset = {"offset": "456"} + + offset_type, offset_value = self.hubspot_instance._get_offset_from_response( + response_with_paging + ) + assert (offset_type, offset_value) == ("after", "123") + + offset_type, offset_value = self.hubspot_instance._get_offset_from_response( + response_with_offset + ) + assert (offset_type, offset_value) == ("offset", "456") + + offset_type, offset_value = self.hubspot_instance._get_offset_from_response({}) + assert (offset_type, offset_value) == (None, None) + + @patch("viadot.sources.hubspot.handle_api_response") + def test_api_connection_with_filters(self, mock_handle_api_response): + """Test Hubspot `api_connection` method, with filters.""" + mock_response = MagicMock(spec=Response) + mock_response.status_code = 200 + mock_response.json.return_value = {"results": [{"id": "123"}]} + mock_handle_api_response.return_value = mock_response + + endpoint = "deals" + filters = variables["filters"] + properties = ["property1"] + self.hubspot_instance.api_connection( + endpoint=endpoint, filters=filters, properties=properties + ) + + assert self.hubspot_instance.full_dataset is not None + assert len(self.hubspot_instance.full_dataset) > 0 + + @patch("viadot.sources.hubspot.handle_api_response") + def test_api_connection_without_filters(self, mock_handle_api_response): + """Test Hubspot `api_connection` method, without filters.""" + mock_response = MagicMock(spec=Response) + mock_response.status_code = 200 + mock_response.json.return_value = {"results": [{"id": "123"}]} + mock_handle_api_response.return_value = mock_response + + endpoint = "deals" + filters = None + properties = ["property1"] + self.hubspot_instance.api_connection( + endpoint=endpoint, filters=filters, properties=properties + ) + + assert self.hubspot_instance.full_dataset is not None + assert len(self.hubspot_instance.full_dataset) > 0 + + @patch("viadot.sources.hubspot.super") + def test_to_df(self, mock_super): + """Test Hubspot `to_df` function.""" + mock_super().to_df = MagicMock() + self.hubspot_instance.full_dataset = [{"id": "123"}] + result_df = self.hubspot_instance.to_df() + result_df.drop( + columns=["_viadot_source", "_viadot_downloaded_at_utc"], + inplace=True, + axis=1, + ) + + expected_df = pd.DataFrame([{"id": "123"}]) + assert result_df.equals(expected_df) + mock_super().to_df.assert_called_once() + + @patch("viadot.sources.hubspot.pd.read_json") + @patch("viadot.sources.hubspot.super") + def test_to_df_empty(self, mock_super, mock_read_json): + """Test Hubspot `to_df` method, checking emptiness.""" + mock_super().to_df = MagicMock() + mock_read_json.return_value = pd.DataFrame() + self.hubspot_instance.full_dataset = StringIO("{}") + + with patch.object( + self.hubspot_instance, "_handle_if_empty" + ) as mock_handle_if_empty: + result_df = self.hubspot_instance.to_df() + result_df.drop( + columns=["_viadot_source", "_viadot_downloaded_at_utc"], + inplace=True, + axis=1, + ) + mock_handle_if_empty.assert_called_once_with( + if_empty="warn", message="The response does not contain any data." + ) + assert result_df.empty + mock_super().to_df.assert_called_once() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_mindful.py b/tests/unit/test_mindful.py new file mode 100644 index 000000000..022852241 --- /dev/null +++ b/tests/unit/test_mindful.py @@ -0,0 +1,146 @@ +from datetime import date, timedelta +from io import StringIO +import unittest +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest +from requests.auth import HTTPBasicAuth +from requests.models import Response +from viadot.exceptions import APIError, CredentialError +from viadot.sources import Mindful +from viadot.sources.mindful import MindfulCredentials + + +variables = {"credentials": {"customer_uuid": "fake_uuid", "auth_token": "fake_token"}} + + +class TestMindfulCredentials: + """Test Mindful Credentials Class.""" + + def test_mindful_credentials(self): + """Test Mindful credentials.""" + MindfulCredentials( + customer_uuid="test_customer_uuid", + auth_token="test_auth_token", # noqa: S106 + ) + + +class TestMindful(unittest.TestCase): + """Test Mindful Class.""" + + @classmethod + def setUpClass(cls): # noqa: ANN206 + """Defined based Mindful Class for the rest of test.""" + cls.mindful_instance = Mindful(credentials=variables["credentials"]) + + @patch("viadot.sources.mindful.get_source_credentials", return_value=None) + def test_init_no_credentials(self, mock_get_source_credentials): + """Test raise error without credentials.""" + with pytest.raises(CredentialError): + Mindful() + + mock_get_source_credentials.assert_called_once() + + @patch("viadot.sources.mindful.handle_api_response") + def test_mindful_api_response(self, mock_handle_api_response): + """Test Mindful `_mindful_api_response` method.""" + mock_response = MagicMock(spec=Response) + mock_handle_api_response.return_value = mock_response + + self.mindful_instance._mindful_api_response(endpoint="interactions") + mock_handle_api_response.assert_called_once_with( + url="https://eu1.surveydynamix.com/api/interactions", + params=None, + method="GET", + auth=unittest.mock.ANY, + ) + + auth_arg = mock_handle_api_response.call_args[1]["auth"] + assert isinstance(auth_arg, HTTPBasicAuth) + assert auth_arg.username == variables["credentials"]["customer_uuid"] + assert auth_arg.password == variables["credentials"]["auth_token"] + + @patch("viadot.sources.mindful.handle_api_response") + def test_api_connection(self, mock_handle_api_response): + """Test Mindful `api_connection` method.""" + mock_response = MagicMock(spec=Response) + mock_response.status_code = 200 + mock_response.content = b'{"data": "some_data"}' + mock_handle_api_response.return_value = mock_response + + date_interval = [date.today() - timedelta(days=1), date.today()] + self.mindful_instance.api_connection( + endpoint="responses", date_interval=date_interval + ) + + mock_handle_api_response.assert_called_once() + assert isinstance(self.mindful_instance.data, StringIO) + + @patch("viadot.sources.mindful.handle_api_response") + def test_api_connection_no_data(self, mock_handle_api_response): + """Test Mindful `api_connection` method without data.""" + mock_response = MagicMock(spec=Response) + mock_response.status_code = 204 + mock_response.content = b"" + mock_handle_api_response.return_value = mock_response + + date_interval = [date.today() - timedelta(days=1), date.today()] + self.mindful_instance.api_connection( + endpoint="responses", date_interval=date_interval + ) + + mock_handle_api_response.assert_called_once() + assert self.mindful_instance.data == "{}" + + @patch("viadot.sources.mindful.handle_api_response") + def test_api_connection_error(self, mock_handle_api_response): + """Test Mindful `api_connection` method, APIError.""" + mock_response = MagicMock(spec=Response) + mock_response.status_code = 500 + mock_response.content = b"Internal Server Error" + mock_handle_api_response.return_value = mock_response + + with pytest.raises(APIError): + self.mindful_instance.api_connection(endpoint="responses") + + @patch("viadot.sources.mindful.pd.read_json") + @patch("viadot.sources.mindful.super") + def test_to_df(self, mock_super, mock_read_json): + """Test Mindful `to_df` method.""" + mock_super().to_df = MagicMock() + mock_read_json.return_value = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) + self.mindful_instance.data = StringIO('{"A": [1, 2], "B": [3, 4]}') + + result_df = self.mindful_instance.to_df() + result_df.drop( + columns=["_viadot_source", "_viadot_downloaded_at_utc"], + inplace=True, + axis=1, + ) + expected_df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) + + assert result_df.equals(expected_df) + mock_super().to_df.assert_called_once() + + @patch("viadot.sources.mindful.pd.read_json") + @patch("viadot.sources.mindful.super") + def test_to_df_empty(self, mock_super, mock_read_json): + """Test Mindful `to_df` method, checking emptiness.""" + mock_super().to_df = MagicMock() + mock_read_json.return_value = pd.DataFrame() + self.mindful_instance.data = StringIO("{}") + + with patch.object( + self.mindful_instance, "_handle_if_empty" + ) as mock_handle_if_empty: + result_df = self.mindful_instance.to_df() + mock_handle_if_empty.assert_called_once_with( + if_empty="warn", message="The response does not contain any data." + ) + assert result_df.empty + mock_super().to_df.assert_called_once() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_outlook.py b/tests/unit/test_outlook.py new file mode 100644 index 000000000..c83d0b293 --- /dev/null +++ b/tests/unit/test_outlook.py @@ -0,0 +1,184 @@ +from datetime import datetime, timezone +import unittest +from unittest.mock import MagicMock, patch + +from O365.mailbox import MailBox +from O365.message import Message +import pandas as pd +import pytest +from viadot.exceptions import CredentialError +from viadot.sources import Outlook +from viadot.sources.outlook import OutlookCredentials + + +variables = { + "credentials": { + "client_id": "fake_client_id", + "client_secret": "fake_client_secret", + "tenant_id": "fake_tenant_id", + }, + "response_1": { + "from": {"emailAddress": {"address": "sender@example.com"}}, + "toRecipients": [{"emailAddress": {"address": "recipient@example.com"}}], + "receivedDateTime": "2023-07-18T12:34:56Z", + "conversationId": "12345", + }, + "expected_1": { + "(sub)folder": "Inbox", + "conversation ID": "12345", + "conversation index": "ConversationIndex", + "categories": "Category1, Category2", + "sender": "sender@example.com", + "subject": "Test Subject", + "recivers": "recipient@example.com", + "received_time": "2023-07-18T12:34:56Z", + "mail_adress": "test", + "Inbox": True, + }, +} + + +class TestOutlookCredentials: + """Test Outlook Credentials Class.""" + + def test_outlook_credentials(self): + """Test Outlook credentials.""" + OutlookCredentials( + client_id="test_client_id", + client_secret="test_client_secret", # noqa: S106 + tenant_id="test_tenant_id", + ) + + +class TestOutlook(unittest.TestCase): + """Test Outlook Class.""" + + @classmethod + def setUpClass(cls): # noqa: ANN206 + """Defined based Outlook Class for the rest of test.""" + cls.outlook_instance = Outlook(credentials=variables["credentials"]) + + @patch("viadot.sources.outlook.get_source_credentials", return_value=None) + def test_missing_credentials(self, mock_get_source_credentials): + """Test raise error without credentials.""" + with pytest.raises(CredentialError): + Outlook(credentials=None) + + mock_get_source_credentials.assert_called_once() + + @patch("O365.Account.mailbox") + def test_get_messages_from_mailbox(self, mock_mailbox): + """Test Outlook `_get_messages_from_mailbox` function.""" + mock_mailbox_obj = MagicMock(spec=MailBox) + mock_mailbox_obj.name = "Inbox" + + mock_message = MagicMock(spec=Message) + mock_message.received = "2023-07-18T12:34:56+00:00" + mock_message.to_api_data.return_value = variables["response_1"] + mock_message.subject = "Test Subject" + mock_message.categories = ["Category1", "Category2"] + mock_message.conversation_index = "ConversationIndex" + + mock_mailbox_obj.get_messages.return_value = [mock_message] + mock_mailbox.return_value = mock_mailbox_obj + + date_range_start_time = datetime(2023, 7, 17, tzinfo=timezone.utc) + date_range_end_time = datetime(2023, 7, 19, tzinfo=timezone.utc) + + messages = self.outlook_instance._get_messages_from_mailbox( + mailbox_name="test@example.com", + dict_folder={"Inbox": mock_mailbox_obj}, + date_range_start_time=date_range_start_time, + date_range_end_time=date_range_end_time, + ) + + expected_message = variables["expected_1"] + assert messages == [expected_message] + + @patch("O365.Account.authenticate", return_value=True) + @patch("O365.Account.mailbox") + def test_api_connection(self, mock_mailbox, mock_authenticate): + """Test Outlook `api_connection` method.""" + mock_mailbox_obj = MagicMock(spec=MailBox) + mock_mailbox.return_value = mock_mailbox_obj + + self.outlook_instance._get_subfolders = MagicMock(return_value={}) + + mailbox_name = "test@example.com" + + self.outlook_instance.api_connection(mailbox_name=mailbox_name) + + self.outlook_instance._get_subfolders.assert_called_once_with( + {}, mock_mailbox_obj + ) + + mock_authenticate.assert_called_once() + + @patch("O365.Account.authenticate", return_value=False) + def test_api_connection_authentication_failure(self, mock_authenticate): # noqa: ARG002 + """Test Outlook `api_connection` method, failure.""" + mailbox_name = "test@example.com" + + with pytest.raises(ValueError): # noqa: PT011 + self.outlook_instance.api_connection(mailbox_name=mailbox_name) + + @patch("O365.Account.mailbox") + def test_to_df(self, mock_mailbox): + """Test Outlook `to_df` function.""" + mock_mailbox_obj = MagicMock(spec=MailBox) + mock_mailbox_obj.name = "Inbox" + + mock_message = MagicMock(spec=Message) + mock_message.received = "2023-07-18T12:34:56+00:00" + mock_message.to_api_data.return_value = { + "from": {"emailAddress": {"address": "sender@example.com"}}, + "toRecipients": [{"emailAddress": {"address": "recipient@example.com"}}], + "receivedDateTime": "2023-07-18T12:34:56Z", + "conversationId": "12345", + } + mock_message.subject = "Test Subject" + mock_message.categories = ["Category1", "Category2"] + mock_message.conversation_index = "ConversationIndex" + + mock_mailbox_obj.get_messages.return_value = [mock_message] + mock_mailbox.return_value = mock_mailbox_obj + + date_range_start_time = datetime(2023, 7, 17, tzinfo=timezone.utc) + date_range_end_time = datetime(2023, 7, 19, tzinfo=timezone.utc) + + self.outlook_instance.data = self.outlook_instance._get_messages_from_mailbox( + mailbox_name="test@example.com", + dict_folder={"Inbox": mock_mailbox_obj}, + date_range_start_time=date_range_start_time, + date_range_end_time=date_range_end_time, + ) + + df = self.outlook_instance.to_df() + df.drop( + columns=["_viadot_source", "_viadot_downloaded_at_utc"], + inplace=True, + axis=1, + ) + + expected_df = pd.DataFrame( + [ + { + "(sub)folder": "Inbox", + "conversation ID": "12345", + "conversation index": "ConversationIndex", + "categories": "Category1, Category2", + "sender": "sender@example.com", + "subject": "Test Subject", + "recivers": "recipient@example.com", + "received_time": "2023-07-18T12:34:56Z", + "mail_adress": "test", + "Inbox": True, + } + ] + ) + + assert df.equals(expected_df) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_redshift_spectrum.py b/tests/unit/test_redshift_spectrum.py new file mode 100644 index 000000000..393788409 --- /dev/null +++ b/tests/unit/test_redshift_spectrum.py @@ -0,0 +1,62 @@ +import os + +import moto +import pytest +from viadot.utils import skip_test_on_missing_extra + + +try: + import boto3 + from viadot.sources import RedshiftSpectrum +except ImportError: + skip_test_on_missing_extra("RedshiftSpectrum", extra="aws") + + +@pytest.fixture() +def _aws_credentials(): + """Mocked AWS Credentials for moto.""" + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" # noqa: S105 + os.environ["AWS_SECURITY_TOKEN"] = "testing" # noqa: S105 + os.environ["AWS_SESSION_TOKEN"] = "testing" # noqa: S105 + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + + +@pytest.fixture() +def _mocked_aws(_aws_credentials): + """Mock all AWS interactions. + + Requires you to create your own boto3 clients. + """ + with moto.mock_aws(): + yield + + +@pytest.fixture() +def redshift_spectrum(_mocked_aws): + conn = boto3.client("s3") + conn.create_bucket(Bucket="test_bucket") + spectrum = RedshiftSpectrum(config_key="redshift_dev") + spectrum.create_schema("test_schema") + + yield spectrum + + spectrum.drop_schema("test_schema") + + +@pytest.mark.skip( + reason="To be implemented: https://github.com/dyvenia/viadot/issues/978." +) +@pytest.mark.usefixtures("_mocked_aws") +def test_from_df(redshift_spectrum, TEST_DF): + bucket = "test_bucket" + schema = "test_schema" + table = "test_table" + result = redshift_spectrum.from_df( + df=TEST_DF, + to_path=f"s3://{bucket}/{schema}/{table}", + schema=schema, + table=table, + description="A mocked test Redshift Spectrum table.", + ) + assert result is True diff --git a/tests/integration/test_sap_rfc.py b/tests/unit/test_sap_rfc.py similarity index 53% rename from tests/integration/test_sap_rfc.py rename to tests/unit/test_sap_rfc.py index e1278c72e..27ee31cf9 100644 --- a/tests/integration/test_sap_rfc.py +++ b/tests/unit/test_sap_rfc.py @@ -1,18 +1,16 @@ -import pytest from collections import OrderedDict -try: - from viadot.sources import SAPRFC, SAPRFCV2 +from viadot.utils import skip_test_on_missing_extra + - _saprfc_installed = True +try: + from viadot.sources import SAPRFC except ImportError: - _saprfc_installed = False + skip_test_on_missing_extra(source_name="SAPRFC", extra="sap") -if not _saprfc_installed: - pytest.skip("SAPRFC source not installed", allow_module_level=True) +credentials = {"a": "b"} -sap = SAPRFC() -sap2 = SAPRFCV2() +sap = SAPRFC(credentials=credentials) sql1 = "SELECT a AS a_renamed, b FROM table1 WHERE table1.c = 1" sql2 = "SELECT a FROM fake_schema.fake_table WHERE a=1 AND b=2 OR c LIKE 'a%' AND d IN (1, 2) LIMIT 5 OFFSET 3" @@ -113,86 +111,3 @@ def test___build_pandas_filter_query(): sap._build_pandas_filter_query(sap.client_side_filters) == "thirdlongcolname == 01234" ), sap._build_pandas_filter_query(sap.client_side_filters) - - -def test__get_table_name_v2(): - assert sap2._get_table_name(sql1) == "table1" - assert sap2._get_table_name(sql2) == "fake_schema.fake_table", sap2._get_table_name( - sql2 - ) - assert sap2._get_table_name(sql7) == "b" - - -def test__get_columns_v2(): - assert sap2._get_columns(sql1) == ["a", "b"] - assert sap2._get_columns(sql1, aliased=True) == [ - "a_renamed", - "b", - ], sap2._get_columns(sql1, aliased=True) - assert sap2._get_columns(sql2) == ["a"] - assert sap2._get_columns(sql7) == ["a", "b"] - - -def test__get_where_condition_v2(): - assert sap2._get_where_condition(sql1) == "table1.c = 1", sap2._get_where_condition( - sql1 - ) - assert ( - sap2._get_where_condition(sql2) == "a=1 AND b=2 OR c LIKE 'a%' AND d IN (1, 2)" - ), sap2._get_where_condition(sql2) - assert ( - sap2._get_where_condition(sql3) - == "testORword=1 AND testANDword=2 AND testLIMITword=3 AND testOFFSETword=4" - ), sap2._get_where_condition(sql3) - assert ( - sap2._get_where_condition(sql4) - == "testLIMIT = 1 AND testOFFSET = 2 AND LIMITtest=3 AND OFFSETtest=4" - ), sap2._get_where_condition(sql4) - assert ( - sap2._get_where_condition(sql7) - == "c = 1 AND d = 2 AND longcolname = 12345 AND otherlongcolname = 6789" - ), sap2._get_where_condition(sql7) - - -def test__get_limit_v2(): - assert sap2._get_limit(sql1) is None - assert sap2._get_limit(sql2) == 5 - assert sap2._get_limit(sql7) == 5 - - -def test__get_offset_v2(): - assert sap2._get_offset(sql1) is None - assert sap2._get_offset(sql2) == 3 - assert sap2._get_offset(sql7) == 10 - - -def test_client_side_filters_simple_v2(): - _ = sap2._get_where_condition(sql5) - assert sap2.client_side_filters == OrderedDict( - {"AND": "longword123=5"} - ), sap2.client_side_filters - - -def test_client_side_filters_with_limit_offset_v2(): - _ = sap2._get_where_condition(sql6) - assert sap2.client_side_filters == OrderedDict( - {"AND": "otherlongcolname=5"} - ), sap2.client_side_filters - - _ = sap2._get_where_condition(sql7) - assert sap2.client_side_filters == OrderedDict( - {"AND": "thirdlongcolname = 01234"} - ), sap2.client_side_filters - - -def test___build_pandas_filter_query_v2(): - _ = sap2._get_where_condition(sql6) - assert ( - sap2._build_pandas_filter_query(sap2.client_side_filters) - == "otherlongcolname == 5" - ), sap2._build_pandas_filter_query(sap2.client_side_filters) - _ = sap2._get_where_condition(sql7) - assert ( - sap2._build_pandas_filter_query(sap2.client_side_filters) - == "thirdlongcolname == 01234" - ), sap2._build_pandas_filter_query(sap2.client_side_filters) diff --git a/tests/unit/test_sap_rfc_2.py b/tests/unit/test_sap_rfc_2.py new file mode 100644 index 000000000..645e3ee8a --- /dev/null +++ b/tests/unit/test_sap_rfc_2.py @@ -0,0 +1,106 @@ +from collections import OrderedDict + +from viadot.utils import skip_test_on_missing_extra + +from .test_sap_rfc import ( + credentials, + sql1, + sql2, + sql3, + sql4, + sql5, + sql6, + sql7, +) + + +try: + from viadot.sources import SAPRFCV2 +except ImportError: + skip_test_on_missing_extra(source_name="SAPRFCV2", extra="sap") + + +sap = SAPRFCV2(credentials=credentials) + + +def test__get_table_name(): + assert sap._get_table_name(sql1) == "table1" + assert sap._get_table_name(sql2) == "fake_schema.fake_table", sap._get_table_name( + sql2 + ) + assert sap._get_table_name(sql7) == "b" + + +def test__get_columns(): + assert sap._get_columns(sql1) == ["a", "b"] + assert sap._get_columns(sql1, aliased=True) == [ + "a_renamed", + "b", + ], sap._get_columns(sql1, aliased=True) + assert sap._get_columns(sql2) == ["a"] + assert sap._get_columns(sql7) == ["a", "b"] + + +def test__get_where_condition(): + assert sap._get_where_condition(sql1) == "table1.c = 1", sap._get_where_condition( + sql1 + ) + assert ( + sap._get_where_condition(sql2) == "a=1 AND b=2 OR c LIKE 'a%' AND d IN (1, 2)" + ), sap._get_where_condition(sql2) + assert ( + sap._get_where_condition(sql3) + == "testORword=1 AND testANDword=2 AND testLIMITword=3 AND testOFFSETword=4" + ), sap._get_where_condition(sql3) + assert ( + sap._get_where_condition(sql4) + == "testLIMIT = 1 AND testOFFSET = 2 AND LIMITtest=3 AND OFFSETtest=4" + ), sap._get_where_condition(sql4) + assert ( + sap._get_where_condition(sql7) + == "c = 1 AND d = 2 AND longcolname = 12345 AND otherlongcolname = 6789" + ), sap._get_where_condition(sql7) + + +def test__get_limit(): + assert sap._get_limit(sql1) is None + assert sap._get_limit(sql2) == 5 + assert sap._get_limit(sql7) == 5 + + +def test__get_offset(): + assert sap._get_offset(sql1) is None + assert sap._get_offset(sql2) == 3 + assert sap._get_offset(sql7) == 10 + + +def test_client_side_filters_simple(): + _ = sap._get_where_condition(sql5) + assert sap.client_side_filters == OrderedDict( + {"AND": "longword123=5"} + ), sap.client_side_filters + + +def test_client_side_filters_with_limit_offset(): + _ = sap._get_where_condition(sql6) + assert sap.client_side_filters == OrderedDict( + {"AND": "otherlongcolname=5"} + ), sap.client_side_filters + + _ = sap._get_where_condition(sql7) + assert sap.client_side_filters == OrderedDict( + {"AND": "thirdlongcolname = 01234"} + ), sap.client_side_filters + + +def test___build_pandas_filter_query(): + _ = sap._get_where_condition(sql6) + assert ( + sap._build_pandas_filter_query(sap.client_side_filters) + == "otherlongcolname == 5" + ), sap._build_pandas_filter_query(sap.client_side_filters) + _ = sap._get_where_condition(sql7) + assert ( + sap._build_pandas_filter_query(sap.client_side_filters) + == "thirdlongcolname == 01234" + ), sap._build_pandas_filter_query(sap.client_side_filters) diff --git a/tests/unit/test_sharepoint.py b/tests/unit/test_sharepoint.py index 6de4406d9..177d29fe0 100644 --- a/tests/unit/test_sharepoint.py +++ b/tests/unit/test_sharepoint.py @@ -1,7 +1,14 @@ from pathlib import Path +from unittest.mock import MagicMock, patch import pandas as pd +import pytest +import sharepy +from sharepy.errors import AuthError +from viadot.exceptions import CredentialError from viadot.sources import Sharepoint +from viadot.sources.sharepoint import SharepointCredentials + DUMMY_CREDS = {"site": "test", "username": "test2", "password": "test"} SAMPLE_DF = pd.DataFrame( @@ -16,40 +23,179 @@ class SharepointMock(Sharepoint): - def _download_excel(self, url=None): + def get_connection(self): + return sharepy.session.SharePointSession + + def _download_file_stream(self, url: str | None = None, **kwargs): # noqa: ARG002 + if "nrows" in kwargs: + msg = "Parameter 'nrows' is not supported." + raise ValueError(msg) + return pd.ExcelFile(Path("tests/unit/test_file.xlsx")) -def test_sharepoint_default_na(): - s = SharepointMock(credentials=DUMMY_CREDS) - df = s.to_df(url="test", na_values=Sharepoint.DEFAULT_NA_VALUES) +@pytest.fixture() +def sharepoint_mock(): + return SharepointMock(credentials=DUMMY_CREDS) + + +def test_valid_credentials(): + credentials = { + "site": "tenant.sharepoint.com", + "username": "user@example.com", + "password": "password", + } + shrp_creds = SharepointCredentials(**credentials) + assert shrp_creds.site == credentials["site"] + assert shrp_creds.username == credentials["username"] + assert shrp_creds.password == credentials["password"] + + +def test_invalid_authentication(): + credentials = { + "site": "tenant.sharepoint.com", + "username": "user@example.com", + "password": "password", + } + + s = Sharepoint(credentials=credentials) + + # Patch the sharepy.connect method to simulate an authentication failure + with patch("sharepy.connect") as mock_connect: + mock_connect.side_effect = AuthError("Authentication failed") + + with pytest.raises( + CredentialError, + match="Could not authenticate to tenant.sharepoint.com with provided credentials.", + ): + s.get_connection() + + +def test_missing_username(): + credentials = {"site": "example.sharepoint.com", "password": "password"} + with pytest.raises( + CredentialError, + match="'site', 'username', and 'password' credentials are required.", + ): + SharepointCredentials(**credentials) + + +def test_sharepoint_default_na(sharepoint_mock): + df = sharepoint_mock.to_df( + url="test/file.xlsx", na_values=Sharepoint.DEFAULT_NA_VALUES + ) assert not df.empty assert "NA" not in list(df["col_a"]) -def test_sharepoint_custom_na(): - s = SharepointMock(credentials=DUMMY_CREDS) - df = s.to_df( - url="test", na_values=[v for v in Sharepoint.DEFAULT_NA_VALUES if v != "NA"] +def test_sharepoint_custom_na(sharepoint_mock): + df = sharepoint_mock.to_df( + url="test/file.xlsx", + na_values=[v for v in Sharepoint.DEFAULT_NA_VALUES if v != "NA"], ) assert not df.empty assert "NA" in list(df["col_a"]) -def test_sharepoint_convert_all_to_string_type(): - s = SharepointMock(credentials=DUMMY_CREDS) - converted_df = s._convert_all_to_string_type(df=SAMPLE_DF) +def test__get_file_extension(sharepoint_mock): + url_excel = "https://tenant.sharepoint.com/sites/site/file.xlsx" + url_dir = "https://tenant.sharepoint.com/sites/site/" + url_txt = "https://tenant.sharepoint.com/sites/site/file.txt" + + excel_ext = sharepoint_mock._get_file_extension(url=url_excel) + txt_ext = sharepoint_mock._get_file_extension(url=url_txt) + dir_ext = sharepoint_mock._get_file_extension(url=url_dir) + + assert excel_ext == ".xlsx" + assert txt_ext == ".txt" + assert dir_ext == "" + + +def test__is_file(sharepoint_mock): + is_file = sharepoint_mock._is_file(url="https://example.com/file.xlsx") + assert is_file is True + + is_file = sharepoint_mock._is_file(url="https://example.com/dir") + assert is_file is False + + +def test__parse_excel_single_sheet(sharepoint_mock): + excel_file = sharepoint_mock._download_file_stream() + result_df = sharepoint_mock._parse_excel(excel_file, sheet_name="Sheet1") + expected = pd.DataFrame( + { + "col_a": ["val1", "", "val2", "NA", "N/A", "#N/A"], + "col_b": ["val1", "val2", "val3", "val4", "val5", "val6"], + } + ) + + assert result_df["col_b"].equals(expected["col_b"]) + + +def test__parse_excel_string_dtypes(sharepoint_mock): + excel_file = sharepoint_mock._download_file_stream() + result_df = sharepoint_mock._parse_excel(excel_file, sheet_name="Sheet1") + + for column in result_df.columns: + assert result_df[column].dtype == object + + +def test__load_and_parse_not_valid_extension(sharepoint_mock): + with pytest.raises(ValueError): # noqa: PT011 + sharepoint_mock._load_and_parse(file_url="https://example.com/file.txt") + + +def test_scan_sharepoint_folder_valid_url(sharepoint_mock): + url = "https://company.sharepoint.com/sites/site_name/final_folder/" + + # Mock the response from SharePoint + mock_response = MagicMock() + mock_response.json.return_value = { + "d": { + "results": [ + {"Name": "file1.txt"}, + {"Name": "file2.txt"}, + ] + } + } + + # Inject the mock response + sharepoint_mock.get_connection().get = MagicMock(return_value=mock_response) + + expected_files = [ + "https://company.sharepoint.com/sites/site_name/final_folder/file1.txt", + "https://company.sharepoint.com/sites/site_name/final_folder/file2.txt", + ] + + result = sharepoint_mock.scan_sharepoint_folder(url) + assert result == expected_files + + +def test_scan_sharepoint_folder_invalid_url(sharepoint_mock): + url = "https://company.sharepoint.com/folder/sub_folder/final_folder" + + with pytest.raises(ValueError, match="URL does not contain '/sites/' segment."): + sharepoint_mock.scan_sharepoint_folder(url) + + +def test_scan_sharepoint_folder_empty_response(sharepoint_mock): + url = ( + "https://company.sharepoint.com/sites/site_name/folder/sub_folder/final_folder" + ) + + mock_response = MagicMock() + mock_response.json.return_value = {"d": {"results": []}} + + sharepoint_mock.get_connection().get = MagicMock(return_value=mock_response) - assert not converted_df.empty - assert pd.isnull(converted_df["nan_col"]).all() + result = sharepoint_mock.scan_sharepoint_folder(url) + assert result == [] -def test_sharepoint_convert_empty_columns_to_string(): - s = SharepointMock(credentials=DUMMY_CREDS) - converted_df = s._empty_column_to_string(df=SAMPLE_DF) +def test_download_file_stream_unsupported_param(sharepoint_mock): + url = "https://company.sharepoint.com/sites/site_name/folder/test_file.xlsx" - assert not converted_df.empty - assert converted_df["float_col"].dtype == float - assert converted_df["nan_col"].dtype == "string" + with pytest.raises(ValueError, match="Parameter 'nrows' is not supported."): + sharepoint_mock._download_file_stream(url, nrows=10) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index e3c84438d..c83b7adfe 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,9 +1,9 @@ +from contextlib import nullcontext as does_not_raise import json import logging import pandas as pd - -from viadot.exceptions import ValidationError +import pytest from viadot.utils import ( _cast_df_cols, add_viadot_metadata_columns, @@ -11,15 +11,16 @@ get_fqn, handle_api_request, validate, + validate_and_reorder_dfs_columns, ) def test_single_quotes_inside(): - TEST_VALUE = "a'b" + test_value = "a'b" df1 = pd.DataFrame( { "a": [ - TEST_VALUE, + test_value, ], "b": ["a"], } @@ -27,21 +28,21 @@ def test_single_quotes_inside(): test_insert_query = gen_bulk_insert_query_from_df( df1, table_fqn="test_schema.test_table" ) - TEST_VALUE_ESCAPED = "'a''b'" + test_value_escaped = "'a''b'" assert ( test_insert_query == f"""INSERT INTO test_schema.test_table (a, b) -VALUES ({TEST_VALUE_ESCAPED}, 'a')""" +VALUES ({test_value_escaped}, 'a')""" ), test_insert_query def test_single_quotes_outside(): - TEST_VALUE = "'a'" + test_value = "'a'" df1 = pd.DataFrame( { "a": [ - TEST_VALUE, + test_value, ], "b": ["b"], } @@ -49,21 +50,21 @@ def test_single_quotes_outside(): test_insert_query = gen_bulk_insert_query_from_df( df1, table_fqn="test_schema.test_table" ) - TEST_VALUE_ESCAPED = "'''a'''" + test_value_escaped = "'''a'''" assert ( test_insert_query == f"""INSERT INTO test_schema.test_table (a, b) -VALUES ({TEST_VALUE_ESCAPED}, 'b')""" +VALUES ({test_value_escaped}, 'b')""" ), test_insert_query def test_double_quotes_inside(): - TEST_VALUE = 'a "b"' + test_value = 'a "b"' df1 = pd.DataFrame( { "a": [ - TEST_VALUE, + test_value, ], "b": ["c"], } @@ -71,12 +72,12 @@ def test_double_quotes_inside(): test_insert_query = gen_bulk_insert_query_from_df( df1, table_fqn="test_schema.test_table" ) - TEST_VALUE_ESCAPED = """'a "b"'""" + test_value_escaped = """'a "b"'""" assert ( test_insert_query == f"""INSERT INTO test_schema.test_table (a, b) -VALUES ({TEST_VALUE_ESCAPED}, 'c')""" +VALUES ({test_value_escaped}, 'c')""" ), test_insert_query @@ -108,16 +109,15 @@ class TestingClass: @add_viadot_metadata_columns def to_df(self): my_dict = {"AA": [1, 1], "BB": [2, 2]} - df = pd.DataFrame(my_dict) - return df + return pd.DataFrame(my_dict) testing_instance = TestingClass() df = testing_instance.to_df() assert "_viadot_source" in df.columns -def test___cast_df_cols(): - TEST_DF = pd.DataFrame( +def test__cast_df_cols(): + test_df = pd.DataFrame( { "bool_column": [True, False, True, False], "datetime_column": [ @@ -130,9 +130,11 @@ def test___cast_df_cols(): "object_column": ["apple", "banana", "melon", "orange"], } ) - TEST_DF["datetime_column"] = pd.to_datetime(TEST_DF["datetime_column"]) + test_df["datetime_column"] = pd.to_datetime( + test_df["datetime_column"], format="mixed" + ) result_df = _cast_df_cols( - TEST_DF, types_to_convert=["datetime", "bool", "int", "object"] + test_df, types_to_convert=["datetime", "bool", "int", "object"] ) assert result_df["bool_column"].dtype == pd.Int64Dtype() @@ -154,10 +156,8 @@ def test_get_fqn(): def test_validate_column_size_pass(): df = pd.DataFrame({"col1": ["a", "bb", "ccc"]}) tests = {"column_size": {"col1": 3}} - try: + with does_not_raise(): validate(df, tests) - except ValidationError: - assert False, "Validation failed but was expected to pass" def test_validate_column_size_fail(caplog): @@ -171,10 +171,8 @@ def test_validate_column_size_fail(caplog): def test_validate_column_unique_values_pass(): df = pd.DataFrame({"col1": [1, 2, 3]}) tests = {"column_unique_values": ["col1"]} - try: + with does_not_raise(): validate(df, tests) - except ValidationError: - assert False, "Validation failed but was expected to pass" def test_validate_column_unique_values_fail(caplog): @@ -188,10 +186,8 @@ def test_validate_column_unique_values_fail(caplog): def test_validate_column_list_to_match_pass(): df = pd.DataFrame({"col1": [1], "col2": [2]}) tests = {"column_list_to_match": ["col1", "col2"]} - try: + with does_not_raise(): validate(df, tests) - except ValidationError: - assert False, "Validation failed but was expected to pass" def test_validate_column_list_to_match_fail(caplog): @@ -205,10 +201,8 @@ def test_validate_column_list_to_match_fail(caplog): def test_validate_dataset_row_count_pass(): df = pd.DataFrame({"col1": [1, 2, 3]}) tests = {"dataset_row_count": {"min": 1, "max": 5}} - try: + with does_not_raise(): validate(df, tests) - except ValidationError: - assert False, "Validation failed but was expected to pass" def test_validate_dataset_row_count_fail(caplog): @@ -222,10 +216,8 @@ def test_validate_dataset_row_count_fail(caplog): def test_validate_column_match_regex_pass(): df = pd.DataFrame({"col1": ["A12", "B34", "C45"]}) tests = {"column_match_regex": {"col1": "^[A-Z][0-9]{2}$"}} - try: + with does_not_raise(): validate(df, tests) - except ValidationError: - assert "Validation failed but was expected to pass" def test_validate_column_match_regex_fail(caplog): @@ -239,10 +231,8 @@ def test_validate_column_match_regex_fail(caplog): def test_validate_column_sum_pass(): df = pd.DataFrame({"col1": [1, 2, 3]}) tests = {"column_sum": {"col1": {"min": 5, "max": 10}}} - try: + with does_not_raise(): validate(df, tests) - except ValidationError: - assert False, "Validation failed but was expected to pass" def test_validate_column_sum_fail(caplog): @@ -251,3 +241,43 @@ def test_validate_column_sum_fail(caplog): with caplog.at_level(logging.INFO): validate(df, tests) assert "Sum of 10 for col1 is out of the expected range - <5:6>" in caplog.text + + +def test_validate_and_reorder_wrong_columns(): + df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + df2 = pd.DataFrame({"a": [5, 6], "c": [7, 8]}) + + with pytest.raises(ValueError): # noqa: PT011 + validate_and_reorder_dfs_columns([df1, df2]) + + +def test_validate_and_reorder_empty_list(): + with pytest.raises(IndexError): + validate_and_reorder_dfs_columns([]) + + +def test_validate_and_reorder_identical_columns(): + df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + df2 = pd.DataFrame({"a": [5, 6], "b": [7, 8]}) + + result = validate_and_reorder_dfs_columns([df1, df2]) + + assert len(result) == 2 + assert list(result[0].columns) == list(df1.columns) + assert result[0].equals(df1) + assert list(result[1].columns) == list(df2.columns) + assert result[1].equals(df2) + + +def test_validate_and_reorder_different_order_columns(): + df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + df2 = pd.DataFrame({"b": [7, 8], "a": [5, 6]}) + + expected_df2 = pd.DataFrame({"a": [5, 6], "b": [7, 8]}) + result = validate_and_reorder_dfs_columns([df1, df2]) + + assert len(result) == 2 + assert list(result[0].columns) == list(df1.columns) + assert result[0].equals(df1) + assert list(result[1].columns) == list(expected_df2.columns) + assert result[1].equals(expected_df2)