diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 31fead8a7ade65..13c921e953c324 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -809,7 +809,7 @@ jobs: DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }} DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }} ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag }} - ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor] acryl-datahub-actions' + ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5' ACTIONS_CONFIG: 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml' run: | ./smoke-test/run-quickstart.sh diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 1cbc65f2b63700..68432a4feb13dd 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -27,6 +27,7 @@ jobs: - uses: actions/setup-python@v4 with: python-version: "3.10" + cache: pip - name: Install Python dependencies run: ./metadata-ingestion/scripts/install_deps.sh - name: Build Docs diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java index ee6a5ed6f1536f..3c0a9762a28c92 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java @@ -25,6 +25,8 @@ public class RestoreIndices implements Upgrade { public static final String URN_ARG_NAME = "urn"; public static final String URN_LIKE_ARG_NAME = "urnLike"; + public static final String STARTING_OFFSET_ARG_NAME = "startingOffset"; + private final List _steps; public RestoreIndices(final Database server, final EntityService entityService, diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java index ce39b3fb562aff..2ac4fea2e653ac 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java @@ -30,6 +30,8 @@ public class SendMAEStep implements UpgradeStep { private static final int DEFAULT_BATCH_SIZE = 1000; private static final long DEFAULT_BATCH_DELAY_MS = 250; + + private static final int DEFAULT_STARTING_OFFSET = 0; private static final int DEFAULT_THREADS = 1; private final Database _server; @@ -83,6 +85,7 @@ private RestoreIndicesArgs getArgs(UpgradeContext context) { result.batchSize = getBatchSize(context.parsedArgs()); result.numThreads = getThreadCount(context.parsedArgs()); result.batchDelayMs = getBatchDelayMs(context.parsedArgs()); + result.start = getStartingOffset(context.parsedArgs()); if (containsKey(context.parsedArgs(), RestoreIndices.ASPECT_NAME_ARG_NAME)) { result.aspectName = context.parsedArgs().get(RestoreIndices.ASPECT_NAME_ARG_NAME).get(); } @@ -124,7 +127,7 @@ public Function executable() { final int rowCount = getRowCount(args); context.report().addLine(String.format("Found %s latest aspects in aspects table in %.2f minutes.", rowCount, (float) (System.currentTimeMillis() - startTime) / 1000 / 60)); - int start = 0; + int start = args.start; List> futures = new ArrayList<>(); startTime = System.currentTimeMillis(); @@ -186,6 +189,10 @@ private int getBatchSize(final Map> parsedArgs) { return getInt(parsedArgs, DEFAULT_BATCH_SIZE, RestoreIndices.BATCH_SIZE_ARG_NAME); } + private int getStartingOffset(final Map> parsedArgs) { + return getInt(parsedArgs, DEFAULT_STARTING_OFFSET, RestoreIndices.STARTING_OFFSET_ARG_NAME); + } + private long getBatchDelayMs(final Map> parsedArgs) { long resolvedBatchDelayMs = DEFAULT_BATCH_DELAY_MS; if (containsKey(parsedArgs, RestoreIndices.BATCH_DELAY_MS_ARG_NAME)) { diff --git a/docker/datahub-ingestion-base/smoke.Dockerfile b/docker/datahub-ingestion-base/smoke.Dockerfile index 276f6dbc4436e2..15dc46ae5b882a 100644 --- a/docker/datahub-ingestion-base/smoke.Dockerfile +++ b/docker/datahub-ingestion-base/smoke.Dockerfile @@ -20,7 +20,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-11-jdk COPY . /datahub-src ARG RELEASE_VERSION RUN cd /datahub-src/metadata-ingestion && \ - sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ + sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ cat src/datahub/__init__.py && \ cd ../ && \ ./gradlew :metadata-ingestion:installAll diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index 2ceff6a800ebbb..8b726df5e88420 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -11,8 +11,8 @@ COPY ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plug ARG RELEASE_VERSION WORKDIR /datahub-ingestion -RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ - sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ +RUN sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ + sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ cat src/datahub/__init__.py && \ chown -R datahub /datahub-ingestion diff --git a/docker/datahub-ingestion/Dockerfile-slim-only b/docker/datahub-ingestion/Dockerfile-slim-only index 678bee7e306f67..9ae116f839aa07 100644 --- a/docker/datahub-ingestion/Dockerfile-slim-only +++ b/docker/datahub-ingestion/Dockerfile-slim-only @@ -9,7 +9,7 @@ COPY ./metadata-ingestion /datahub-ingestion ARG RELEASE_VERSION WORKDIR /datahub-ingestion -RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ +RUN sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ cat src/datahub/__init__.py && \ chown -R datahub /datahub-ingestion diff --git a/docs-website/build.gradle b/docs-website/build.gradle index 851c10d9ea97f1..370ae3eec91761 100644 --- a/docs-website/build.gradle +++ b/docs-website/build.gradle @@ -119,6 +119,10 @@ task yarnBuild(type: YarnTask, dependsOn: [yarnLint, yarnGenerate, downloadHisto outputs.dir("dist") // tell gradle to apply the build cache outputs.cacheIf { true } + // See https://stackoverflow.com/questions/53230823/fatal-error-ineffective-mark-compacts-near-heap-limit-allocation-failed-java + // and https://github.com/facebook/docusaurus/issues/8329. + // TODO: As suggested in https://github.com/facebook/docusaurus/issues/4765, try switching to swc-loader. + environment = ['NODE_OPTIONS': '--max-old-space-size=10248'] args = ['run', 'build'] } diff --git a/docs-website/sphinx/Makefile b/docs-website/sphinx/Makefile index 00ece7ae253317..c01b45e322c679 100644 --- a/docs-website/sphinx/Makefile +++ b/docs-website/sphinx/Makefile @@ -22,7 +22,7 @@ $(VENV_SENTINEL): requirements.txt $(VENV_DIR)/bin/pip install -r requirements.txt touch $(VENV_SENTINEL) -.PHONY: help html doctest linkcheck clean serve md +.PHONY: help html doctest linkcheck clean clean_all serve md # Not using Python's http.server because it enables caching headers. serve: @@ -35,3 +35,6 @@ md: html # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). html doctest linkcheck clean: venv Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +clean_all: clean + -rm -rf $(VENV_DIR) diff --git a/docs-website/sphinx/requirements.txt b/docs-website/sphinx/requirements.txt index a63fd058532599..94ddd40579f0e7 100644 --- a/docs-website/sphinx/requirements.txt +++ b/docs-website/sphinx/requirements.txt @@ -1,4 +1,4 @@ --e ../../metadata-ingestion[datahub-rest] +-e ../../metadata-ingestion[datahub-rest,sql-parsing] beautifulsoup4==4.11.2 Sphinx==6.1.3 sphinx-click==4.4.0 diff --git a/docs-website/versions.json b/docs-website/versions.json index 0b79ac9498e063..a5493c26a4c659 100644 --- a/docs-website/versions.json +++ b/docs-website/versions.json @@ -1,3 +1,4 @@ [ + "0.11.0", "0.10.5" ] diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock index 209a57a43dab03..0613fe71ef78ee 100644 --- a/docs-website/yarn.lock +++ b/docs-website/yarn.lock @@ -2986,6 +2986,13 @@ dependencies: "@types/node" "*" +"@types/websocket@^1.0.3": + version "1.0.6" + resolved "https://registry.yarnpkg.com/@types/websocket/-/websocket-1.0.6.tgz#ec8dce5915741632ac3a4b1f951b6d4156e32d03" + integrity sha512-JXkliwz93B2cMWOI1ukElQBPN88vMg3CruvW4KVSKpflt3NyNCJImnhIuB/f97rG7kakqRJGFiwkA895Kn02Dg== + dependencies: + "@types/node" "*" + "@types/ws@^8.5.5": version "8.5.5" resolved "https://registry.yarnpkg.com/@types/ws/-/ws-8.5.5.tgz#af587964aa06682702ee6dcbc7be41a80e4b28eb" @@ -7053,7 +7060,6 @@ node-forge@^1: resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-1.3.1.tgz#be8da2af243b2417d5f646a770663a92b7e9ded3" integrity sha512-dPEtOeMvF9VMcYV/1Wb8CPoVAXtp6MKMlcbAt4ddqmGqUJ6fQZFXkNZNkNlfevtNkGtaSoXf/vNNNSvgrdXwtA== - node-gyp-build@^4.3.0: version "4.6.1" resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.6.1.tgz#24b6d075e5e391b8d5539d98c7fc5c210cac8a3e" @@ -9903,6 +9909,10 @@ use-sidecar@^1.1.2: detect-node-es "^1.1.0" tslib "^2.0.0" +use-sync-external-store@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz#7dbefd6ef3fe4e767a0cf5d7287aacfb5846928a" + integrity sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA== utf-8-validate@^5.0.2: version "5.0.10" @@ -9911,12 +9921,6 @@ utf-8-validate@^5.0.2: dependencies: node-gyp-build "^4.3.0" -use-sync-external-store@^1.2.0: - version "1.2.0" - resolved "https://registry.yarnpkg.com/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz#7dbefd6ef3fe4e767a0cf5d7287aacfb5846928a" - integrity sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA== - - util-deprecate@^1.0.1, util-deprecate@^1.0.2, util-deprecate@~1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 7ba516c82cf1b7..9b19291ee246ae 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -5,19 +5,42 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next ### Breaking Changes +- #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now. ### Potential Downtime +### Deprecations + +### Other Notable Changes + +## 0.11.0 + +### Breaking Changes + +### Potential Downtime +- #8611 Search improvements requires reindexing indices. A `system-update` job will run which will set indices to read-only and create a backup/clone of each index. During the reindexing new components will be prevented from start-up until the reindex completes. The logs of this job will indicate a % complete per index. Depending on index sizes and infrastructure this process can take 5 minutes to hours however as a rough estimate 1 hour for every 2.3 million entities. + ### Deprecations - #8525: In LDAP ingestor, the `manager_pagination_enabled` changed to general `pagination_enabled` +- MAE Events are no longer produced. MAE events have been deprecated for over a year. ### Other Notable Changes +- In this release we now enable you to create and delete pinned announcements on your DataHub homepage! If you have the “Manage Home Page Posts” platform privilege you’ll see a new section in settings called “Home Page Posts” where you can create and delete text posts and link posts that your users see on the home page. +- The new search and browse experience, which was first made available in the previous release behind a feature flag, is now on by default. Check out our release notes for v0.10.5 to get more information and documentation on this new Browse experience. +- In addition to the ranking changes mentioned above, this release includes changes to the highlighting of search entities to understand why they match your query. You can also sort your results alphabetically or by last updated times, in addition to relevance. In this release, we suggest a correction if your query has a typo in it. - #8300: Clickhouse source now inherited from TwoTierSQLAlchemy. In old way we have platform_instance -> container -> co container db (None) -> container schema and now we have platform_instance -> container database. - #8300: Added `uri_opts` argument; now we can add any options for clickhouse client. - #8659: BigQuery ingestion no longer creates DataPlatformInstance aspects by default. This will only affect users that were depending on this aspect for custom functionality, and can be enabled via the `include_data_platform_instance` config option. +- OpenAPI entity and aspect endpoints expanded to improve developer experience when using this API with additional aspects to be added in the near future. +- The CLI now supports recursive deletes. +- Batching of default aspects on initial ingestion (SQL) +- Improvements to multi-threading. Ingestion recipes, if previously reduced to 1 thread, can be restored to the 15 thread default. +- Gradle 7 upgrade moderately improves build speed +- DataHub Ingestion slim images reduced in size by 2GB+ +- Glue Schema Registry fixed ## 0.10.5 diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle index d1e6f2f6464914..58a2bc9e670e34 100644 --- a/metadata-ingestion-modules/airflow-plugin/build.gradle +++ b/metadata-ingestion-modules/airflow-plugin/build.gradle @@ -110,14 +110,14 @@ task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) { commandLine 'bash', '-x', '-c', "source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml" } -task buildWheel(type: Exec, dependsOn: [install]) { - commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' -} task cleanPythonCache(type: Exec) { commandLine 'bash', '-c', "find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" } +task buildWheel(type: Exec, dependsOn: [install, cleanPythonCache]) { + commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' +} build.dependsOn install check.dependsOn lint diff --git a/metadata-ingestion-modules/airflow-plugin/scripts/release.sh b/metadata-ingestion-modules/airflow-plugin/scripts/release.sh index 7134187a458850..87157479f37d63 100755 --- a/metadata-ingestion-modules/airflow-plugin/scripts/release.sh +++ b/metadata-ingestion-modules/airflow-plugin/scripts/release.sh @@ -13,7 +13,7 @@ MODULE=datahub_airflow_plugin python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' if [[ ${RELEASE_VERSION:-} ]]; then # Replace version with RELEASE_VERSION env variable - sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/${MODULE}/__init__.py + sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/${MODULE}/__init__.py else vim src/${MODULE}/__init__.py fi diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py index ce98a0fc1fb609..b2c45d3a1e75d3 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py @@ -1,6 +1,6 @@ # Published at https://pypi.org/project/acryl-datahub/. __package_name__ = "acryl-datahub-airflow-plugin" -__version__ = "0.0.0.dev0" +__version__ = "1!0.0.0.dev0" def is_dev_mode() -> bool: diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index 199ccc59c21e04..c20d98cbcbb58b 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -71,7 +71,6 @@ task installDev(type: Exec, dependsOn: [install]) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "${venv_name}/bin/pip install -e .[dev] ${extra_pip_requirements} && " + - "./scripts/install-sqlalchemy-stubs.sh && " + "touch ${sentinel_file}" } @@ -82,7 +81,6 @@ task installAll(type: Exec, dependsOn: [install]) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "${venv_name}/bin/pip install -e .[all] ${extra_pip_requirements} && " + - "./scripts/install-sqlalchemy-stubs.sh && " + "touch ${sentinel_file}" } @@ -119,7 +117,6 @@ task lint(type: Exec, dependsOn: installDev) { task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "./scripts/install-sqlalchemy-stubs.sh && " + "black src/ tests/ examples/ && " + "isort src/ tests/ examples/ && " + "flake8 src/ tests/ examples/ && " + @@ -188,9 +185,6 @@ task specGen(type: Exec, dependsOn: [codegen, installDevTest]) { task docGen(type: Exec, dependsOn: [codegen, installDevTest, specGen]) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && ./scripts/docgen.sh" } -task buildWheel(type: Exec, dependsOn: [install, codegen]) { - commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' -} @@ -198,6 +192,9 @@ task cleanPythonCache(type: Exec) { commandLine 'bash', '-c', "find src tests -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" } +task buildWheel(type: Exec, dependsOn: [install, codegen, cleanPythonCache]) { + commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' +} build.dependsOn install check.dependsOn lint diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index cb06656940918d..f0fa44687a1096 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -909,7 +909,7 @@ in both of the cases domain should be provisioned on DataHub GMS - Add domains, however replace existing domains sent by ingestion source ```yaml transformers: - - type: "pattern_add_dataset_ownership" + - type: "pattern_add_dataset_domain" config: replace_existing: true # false is default behaviour domain_pattern: @@ -920,7 +920,7 @@ in both of the cases domain should be provisioned on DataHub GMS - Add domains, however overwrite the domains available for the dataset on DataHub GMS ```yaml transformers: - - type: "pattern_add_dataset_ownership" + - type: "pattern_add_dataset_domain" config: semantics: OVERWRITE # OVERWRITE is default behaviour domain_pattern: @@ -931,7 +931,7 @@ in both of the cases domain should be provisioned on DataHub GMS - Add domains, however keep the domains available for the dataset on DataHub GMS ```yaml transformers: - - type: "pattern_add_dataset_ownership" + - type: "pattern_add_dataset_domain" config: semantics: PATCH domain_pattern: diff --git a/metadata-ingestion/scripts/install-sqlalchemy-stubs.sh b/metadata-ingestion/scripts/install-sqlalchemy-stubs.sh deleted file mode 100755 index 7c14a06464f99e..00000000000000 --- a/metadata-ingestion/scripts/install-sqlalchemy-stubs.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -# ASSUMPTION: This assumes that we're running from inside the venv. - -SQLALCHEMY_VERSION=$(python -c 'import sqlalchemy; print(sqlalchemy.__version__)') - -if [[ $SQLALCHEMY_VERSION == 1.3.* ]]; then - ENSURE_NOT_INSTALLED=sqlalchemy2-stubs - ENSURE_INSTALLED=sqlalchemy-stubs -elif [[ $SQLALCHEMY_VERSION == 1.4.* ]]; then - ENSURE_NOT_INSTALLED=sqlalchemy-stubs - ENSURE_INSTALLED=sqlalchemy2-stubs -else - echo "Unsupported SQLAlchemy version: $SQLALCHEMY_VERSION" - exit 1 -fi - -FORCE_REINSTALL="" -if pip show $ENSURE_NOT_INSTALLED >/dev/null 2>&1 ; then - pip uninstall --yes $ENSURE_NOT_INSTALLED - FORCE_REINSTALL="--force-reinstall" -fi - -if [ -n "$FORCE_REINSTALL" ] || ! pip show $ENSURE_INSTALLED >/dev/null 2>&1 ; then - pip install $FORCE_REINSTALL $ENSURE_INSTALLED -fi diff --git a/metadata-ingestion/scripts/release.sh b/metadata-ingestion/scripts/release.sh index 0a09c4e0307b33..eacaf1d920a8d2 100755 --- a/metadata-ingestion/scripts/release.sh +++ b/metadata-ingestion/scripts/release.sh @@ -11,7 +11,7 @@ fi python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' if [[ ${RELEASE_VERSION:-} ]]; then # Replace version with RELEASE_VERSION env variable - sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py + sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py else vim src/datahub/__init__.py fi diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index d8668e89255468..09f71fa769fd37 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -112,7 +112,8 @@ def get_long_description(): sql_common = { # Required for all SQL sources. - "sqlalchemy>=1.3.24, <2", + # This is temporary lower bound that we're open to loosening/tightening as requirements show up + "sqlalchemy>=1.4.39, <2", # Required for SQL profiling. "great-expectations>=0.15.12, <=0.15.50", # scipy version restricted to reduce backtracking, used by great-expectations, @@ -172,13 +173,13 @@ def get_long_description(): } clickhouse_common = { - # Clickhouse 0.1.8 requires SQLAlchemy 1.3.x, while the newer versions - # allow SQLAlchemy 1.4.x. - "clickhouse-sqlalchemy>=0.1.8", + # Clickhouse 0.2.0 adds support for SQLAlchemy 1.4.x + "clickhouse-sqlalchemy>=0.2.0", } redshift_common = { - "sqlalchemy-redshift", + # Clickhouse 0.8.3 adds support for SQLAlchemy 1.4.x + "sqlalchemy-redshift>=0.8.3", "psycopg2-binary", "GeoAlchemy2", *sqllineage_lib, @@ -188,13 +189,8 @@ def get_long_description(): snowflake_common = { # Snowflake plugin utilizes sql common *sql_common, - # Required for all Snowflake sources. - # See https://github.com/snowflakedb/snowflake-sqlalchemy/issues/234 for why 1.2.5 is blocked. - "snowflake-sqlalchemy>=1.2.4, !=1.2.5", - # Because of https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350 we need to restrict SQLAlchemy's max version. - # Eventually we should just require snowflake-sqlalchemy>=1.4.3, but I won't do that immediately - # because it may break Airflow users that need SQLAlchemy 1.3.x. - "SQLAlchemy<1.4.42", + # https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350 + "snowflake-sqlalchemy>=1.4.3", # See https://github.com/snowflakedb/snowflake-connector-python/pull/1348 for why 2.8.2 is blocked "snowflake-connector-python!=2.8.2", "pandas", @@ -206,9 +202,7 @@ def get_long_description(): } trino = { - # Trino 0.317 broke compatibility with SQLAlchemy 1.3.24. - # See https://github.com/trinodb/trino-python-client/issues/250. - "trino[sqlalchemy]>=0.308, !=0.317", + "trino[sqlalchemy]>=0.308", } pyhive_common = { @@ -430,6 +424,7 @@ def get_long_description(): "types-Deprecated", "types-protobuf>=4.21.0.1", "types-tzlocal", + "sqlalchemy2-stubs", } diff --git a/metadata-ingestion/src/datahub/__init__.py b/metadata-ingestion/src/datahub/__init__.py index 3ac3efefc14f06..a470de7b500be3 100644 --- a/metadata-ingestion/src/datahub/__init__.py +++ b/metadata-ingestion/src/datahub/__init__.py @@ -3,7 +3,7 @@ # Published at https://pypi.org/project/acryl-datahub/. __package_name__ = "acryl-datahub" -__version__ = "0.0.0.dev0" +__version__ = "1!0.0.0.dev0" def is_dev_mode() -> bool: diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 42c0ea1601c74d..5931bf89b010b5 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -282,12 +282,14 @@ def deploy( "urn": urn, "name": name, "type": pipeline_config["source"]["type"], - "schedule": {"interval": schedule, "timezone": time_zone}, "recipe": json.dumps(pipeline_config), "executorId": executor_id, "version": cli_version, } + if schedule is not None: + variables["schedule"] = {"interval": schedule, "timezone": time_zone} + if urn: if not datahub_graph.exists(urn): logger.error(f"Could not find recipe for provided urn: {urn}") @@ -331,6 +333,7 @@ def deploy( $version: String) { createIngestionSource(input: { + name: $name, type: $type, schedule: $schedule, config: { diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py index a5aadbd6e246b4..96184d8d445e4e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py @@ -69,11 +69,7 @@ def get_aspects( return for i, row in enumerate(rows): - # TODO: Replace with namedtuple usage once we drop sqlalchemy 1.3 - if hasattr(row, "_asdict"): - row_dict = row._asdict() - else: - row_dict = dict(row) + row_dict = row._asdict() mcp = self._parse_row(row_dict) if mcp: yield mcp, row_dict["createdon"] diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 40b90d216348c7..89b1e45695c578 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -7,10 +7,26 @@ from dataclasses import dataclass, field as dataclasses_field from enum import Enum from functools import lru_cache -from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple, Union +from typing import ( + TYPE_CHECKING, + Dict, + Iterable, + List, + Optional, + Sequence, + Set, + Tuple, + Union, + cast, +) from looker_sdk.error import SDKError -from looker_sdk.sdk.api40.models import LookmlModelExploreField, User, WriteQuery +from looker_sdk.sdk.api40.models import ( + LookmlModelExplore, + LookmlModelExploreField, + User, + WriteQuery, +) from pydantic.class_validators import validator import datahub.emitter.mce_builder as builder @@ -23,6 +39,7 @@ LookerCommonConfig, LookerDashboardSourceConfig, NamingPatternMapping, + ViewNamingPatternMapping, ) from datahub.ingestion.source.looker.looker_constant import IMPORTED_PROJECTS from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI @@ -93,14 +110,16 @@ class LookerViewId: project_name: str model_name: str view_name: str + file_path: str - def get_mapping(self, config: LookerCommonConfig) -> NamingPatternMapping: - return NamingPatternMapping( + def get_mapping(self, config: LookerCommonConfig) -> ViewNamingPatternMapping: + return ViewNamingPatternMapping( platform=config.platform_name, env=config.env.lower(), project=self.project_name, model=self.model_name, name=self.view_name, + file_path=self.file_path, ) @validator("view_name") @@ -109,10 +128,35 @@ def remove_quotes(cls, v): v = v.replace('"', "").replace("`", "") return v + def preprocess_file_path(self, file_path: str) -> str: + new_file_path: str = str(file_path) + + str_to_remove: List[str] = [ + "\\.view\\.lkml$", # escape the . using \ + ] + + for pattern in str_to_remove: + new_file_path = re.sub(pattern, "", new_file_path) + + str_to_replace: Dict[str, str] = { + f"^imported_projects/{re.escape(self.project_name)}/": "", # escape any special regex character present in project-name + "/": ".", # / is not urn friendly + } + + for pattern in str_to_replace: + new_file_path = re.sub(pattern, str_to_replace[pattern], new_file_path) + + logger.debug(f"Original file path {file_path}") + logger.debug(f"After preprocessing file path {new_file_path}") + + return new_file_path + def get_urn(self, config: LookerCommonConfig) -> str: - dataset_name = config.view_naming_pattern.replace_variables( - self.get_mapping(config) - ) + n_mapping: ViewNamingPatternMapping = self.get_mapping(config) + + n_mapping.file_path = self.preprocess_file_path(n_mapping.file_path) + + dataset_name = config.view_naming_pattern.replace_variables(n_mapping) return builder.make_dataset_urn_with_platform_instance( platform=config.platform_name, @@ -135,6 +179,10 @@ class ViewFieldType(Enum): UNKNOWN = "Unknown" +class ViewFieldValue(Enum): + NOT_AVAILABLE = "NotAvailable" + + @dataclass class ViewField: name: str @@ -161,6 +209,69 @@ def create_view_project_map(view_fields: List[ViewField]) -> Dict[str, str]: return view_project_map +def get_view_file_path( + lkml_fields: List[LookmlModelExploreField], view_name: str +) -> Optional[str]: + """ + Search for the view file path on field, if found then return the file path + """ + logger.debug("Entered") + + for field in lkml_fields: + if field.view == view_name: + # This path is relative to git clone directory + logger.debug(f"Found view({view_name}) file-path {field.source_file}") + return field.source_file + + logger.debug(f"Failed to find view({view_name}) file-path") + + return None + + +def create_upstream_views_file_path_map( + view_names: Set[str], lkml_fields: List[LookmlModelExploreField] +) -> Dict[str, Optional[str]]: + """ + Create a map of view-name v/s view file path, so that later we can fetch view's file path via view-name + """ + + upstream_views_file_path: Dict[str, Optional[str]] = {} + + for view_name in view_names: + file_path: Optional[str] = get_view_file_path( + lkml_fields=lkml_fields, view_name=view_name + ) + + upstream_views_file_path[view_name] = file_path + + return upstream_views_file_path + + +def explore_field_set_to_lkml_fields( + explore: LookmlModelExplore, +) -> List[LookmlModelExploreField]: + """ + explore.fields has three variables i.e. dimensions, measures, parameters of same type i.e. LookmlModelExploreField. + This method creating a list by adding all field instance to lkml_fields + """ + lkml_fields: List[LookmlModelExploreField] = [] + + if explore.fields is None: + logger.debug(f"Explore({explore.name}) doesn't have any field") + return lkml_fields + + def empty_list( + fields: Optional[Sequence[LookmlModelExploreField]], + ) -> List[LookmlModelExploreField]: + return list(fields) if fields is not None else [] + + lkml_fields.extend(empty_list(explore.fields.dimensions)) + lkml_fields.extend(empty_list(explore.fields.measures)) + lkml_fields.extend(empty_list(explore.fields.parameters)) + + return lkml_fields + + class LookerUtil: field_type_mapping = { **POSTGRES_TYPES_MAP, @@ -457,6 +568,9 @@ class LookerExplore: upstream_views: Optional[ List[ProjectInclude] ] = None # captures the view name(s) this explore is derived from + upstream_views_file_path: Dict[str, Optional[str]] = dataclasses_field( + default_factory=dict + ) # view_name is key and file_path is value. A single file may contains multiple views joins: Optional[List[str]] = None fields: Optional[List[ViewField]] = None # the fields exposed in this explore source_file: Optional[str] = None @@ -558,6 +672,9 @@ def from_dict( description=dict.get("description"), upstream_views=upstream_views, joins=joins, + # This method is getting called from lookml_source's get_internal_workunits method + # & upstream_views_file_path is not in use in that code flow + upstream_views_file_path={}, ) @classmethod # noqa: C901 @@ -575,6 +692,10 @@ def from_api( # noqa: C901 explore = client.lookml_model_explore(model, explore_name) views: Set[str] = set() + lkml_fields: List[ + LookmlModelExploreField + ] = explore_field_set_to_lkml_fields(explore) + if explore.view_name is not None and explore.view_name != explore.name: # explore is not named after a view and is instead using a from field, which is modeled as view_name. aliased_explore = True @@ -685,6 +806,15 @@ def from_api( # noqa: C901 if view_project_map: logger.debug(f"views and their projects: {view_project_map}") + upstream_views_file_path: Dict[ + str, Optional[str] + ] = create_upstream_views_file_path_map( + lkml_fields=lkml_fields, + view_names=views, + ) + if upstream_views_file_path: + logger.debug(f"views and their file-paths: {upstream_views_file_path}") + return cls( name=explore_name, model_name=model, @@ -699,6 +829,7 @@ def from_api( # noqa: C901 ) for view_name in views ), + upstream_views_file_path=upstream_views_file_path, source_file=explore.source_file, ) except SDKError as e: @@ -791,12 +922,20 @@ def _to_metadata_events( # noqa: C901 upstreams = [] observed_lineage_ts = datetime.datetime.now(tz=datetime.timezone.utc) for view_ref in sorted(self.upstream_views): + # set file_path to ViewFieldType.UNKNOWN if file_path is not available to keep backward compatibility + # if we raise error on file_path equal to None then existing test-cases will fail as mock data doesn't have required attributes. + file_path: str = ( + cast(str, self.upstream_views_file_path[view_ref.include]) + if self.upstream_views_file_path[view_ref.include] is not None + else ViewFieldValue.NOT_AVAILABLE.value + ) view_urn = LookerViewId( project_name=view_ref.project if view_ref.project != _BASE_PROJECT_NAME else self.project_name, model_name=self.model_name, view_name=view_ref.include, + file_path=file_path, ).get_urn(config) upstreams.append( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 455614c758bb93..96c405f7257d04 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -83,10 +83,21 @@ class NamingPatternMapping: name: str +@dataclasses.dataclass +class ViewNamingPatternMapping(NamingPatternMapping): + file_path: str + + class LookerNamingPattern(NamingPattern): ALLOWED_VARS = [field.name for field in dataclasses.fields(NamingPatternMapping)] +class LookerViewNamingPattern(NamingPattern): + ALLOWED_VARS = [ + field.name for field in dataclasses.fields(ViewNamingPatternMapping) + ] + + class LookerCommonConfig(DatasetSourceConfigMixin): explore_naming_pattern: LookerNamingPattern = pydantic.Field( description=f"Pattern for providing dataset names to explores. {LookerNamingPattern.allowed_docstring()}", @@ -96,13 +107,13 @@ class LookerCommonConfig(DatasetSourceConfigMixin): description=f"Pattern for providing browse paths to explores. {LookerNamingPattern.allowed_docstring()}", default=LookerNamingPattern(pattern="/{env}/{platform}/{project}/explores"), ) - view_naming_pattern: LookerNamingPattern = Field( - LookerNamingPattern(pattern="{project}.view.{name}"), - description=f"Pattern for providing dataset names to views. {LookerNamingPattern.allowed_docstring()}", + view_naming_pattern: LookerViewNamingPattern = Field( + LookerViewNamingPattern(pattern="{project}.view.{name}"), + description=f"Pattern for providing dataset names to views. {LookerViewNamingPattern.allowed_docstring()}", ) - view_browse_pattern: LookerNamingPattern = Field( - LookerNamingPattern(pattern="/{env}/{platform}/{project}/views"), - description=f"Pattern for providing browse paths to views. {LookerNamingPattern.allowed_docstring()}", + view_browse_pattern: LookerViewNamingPattern = Field( + LookerViewNamingPattern(pattern="/{env}/{platform}/{project}/views"), + description=f"Pattern for providing browse paths to views. {LookerViewNamingPattern.allowed_docstring()}", ) tag_measures_and_dimensions: bool = Field( True, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 1a32afa2b7fdd6..e69c3b6e601bd3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -58,6 +58,7 @@ ProjectInclude, ViewField, ViewFieldType, + ViewFieldValue, ) from datahub.ingestion.source.looker.looker_lib_wrapper import ( LookerAPI, @@ -1065,10 +1066,30 @@ def _get_fields( fields.append(field) return fields + @classmethod + def determine_view_file_path( + cls, base_folder_path: str, absolute_file_path: str + ) -> str: + + splits: List[str] = absolute_file_path.split(base_folder_path, 1) + if len(splits) != 2: + logger.debug( + f"base_folder_path({base_folder_path}) and absolute_file_path({absolute_file_path}) not matching" + ) + return ViewFieldValue.NOT_AVAILABLE.value + + file_path: str = splits[1] + logger.debug(f"file_path={file_path}") + + return file_path.strip( + "/" + ) # strip / from path to make it equivalent to source_file attribute of LookerModelExplore API + @classmethod def from_looker_dict( cls, project_name: str, + base_folder_path: str, model_name: str, looker_view: dict, connection: LookerConnectionDefinition, @@ -1083,6 +1104,7 @@ def from_looker_dict( populate_sql_logic_in_descriptions: bool = False, process_isolation_for_sql_parsing: bool = False, ) -> Optional["LookerView"]: + view_name = looker_view["name"] logger.debug(f"Handling view {view_name} in model {model_name}") # The sql_table_name might be defined in another view and this view is extending that view, @@ -1206,9 +1228,16 @@ def from_looker_dict( viewLanguage=VIEW_LANGUAGE_LOOKML, ) + file_path = LookerView.determine_view_file_path( + base_folder_path, looker_viewfile.absolute_file_path + ) + return LookerView( id=LookerViewId( - project_name=project_name, model_name=model_name, view_name=view_name + project_name=project_name, + model_name=model_name, + view_name=view_name, + file_path=file_path, ), absolute_file_path=looker_viewfile.absolute_file_path, connection=connection, @@ -1544,6 +1573,7 @@ def _construct_datalineage_urn( project_name=looker_view.id.project_name, model_name=looker_view.id.model_name, view_name=sql_table_name, + file_path=looker_view.id.file_path, ) return view_id.get_urn(self.source_config) @@ -2057,6 +2087,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 ) if looker_viewfile is not None: + for raw_view in looker_viewfile.views: raw_view_name = raw_view["name"] if LookerRefinementResolver.is_refinement(raw_view_name): @@ -2077,22 +2108,36 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 raw_view = looker_refinement_resolver.apply_view_refinement( raw_view=raw_view, ) - maybe_looker_view = LookerView.from_looker_dict( + + current_project_name: str = ( include.project if include.project != _BASE_PROJECT_NAME - else project_name, - model_name, - raw_view, - connectionDefinition, - looker_viewfile, - viewfile_loader, - looker_refinement_resolver, - self.reporter, - self.source_config.max_file_snippet_length, - self.source_config.parse_table_names_from_sql, - self.source_config.sql_parser, - self.source_config.extract_column_level_lineage, - self.source_config.populate_sql_logic_for_missing_descriptions, + else project_name + ) + + # if project is base project then it is available as self.base_projects_folder[_BASE_PROJECT_NAME] + base_folder_path: str = str( + self.base_projects_folder.get( + current_project_name, + self.base_projects_folder[_BASE_PROJECT_NAME], + ) + ) + + maybe_looker_view = LookerView.from_looker_dict( + project_name=current_project_name, + base_folder_path=base_folder_path, + model_name=model_name, + looker_view=raw_view, + connection=connectionDefinition, + looker_viewfile=looker_viewfile, + looker_viewfile_loader=viewfile_loader, + looker_refinement_resolver=looker_refinement_resolver, + reporter=self.reporter, + max_file_snippet_length=self.source_config.max_file_snippet_length, + parse_table_names_from_sql=self.source_config.parse_table_names_from_sql, + sql_parser_path=self.source_config.sql_parser, + extract_col_level_lineage=self.source_config.extract_column_level_lineage, + populate_sql_logic_in_descriptions=self.source_config.populate_sql_logic_for_missing_descriptions, process_isolation_for_sql_parsing=self.source_config.process_isolation_for_sql_parsing, ) except Exception as e: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index d041d219c4bdd7..1cbd4a3b3ea244 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -451,17 +451,10 @@ def _get_operation_aspect_work_unit( yield wu def _process_snowflake_history_row( - self, row: Any + self, event_dict: dict ) -> Iterable[SnowflakeJoinedAccessEvent]: try: # big hammer try block to ensure we don't fail on parsing events self.report.rows_processed += 1 - # Make some minor type conversions. - if hasattr(row, "_asdict"): - # Compat with SQLAlchemy 1.3 and 1.4 - # See https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#rowproxy-is-no-longer-a-proxy-is-now-called-row-and-behaves-like-an-enhanced-named-tuple. - event_dict = row._asdict() - else: - event_dict = dict(row) # no use processing events that don't have a query text if not event_dict["QUERY_TEXT"]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py index 20130ef21e5e6b..1626f86b92545c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py @@ -38,7 +38,6 @@ logger, register_custom_type, ) -from datahub.ingestion.source.sql.sql_config import make_sqlalchemy_uri from datahub.ingestion.source.sql.two_tier_sql_source import ( TwoTierSQLAlchemyConfig, TwoTierSQLAlchemySource, @@ -147,7 +146,6 @@ class ClickHouseConfig( include_materialized_views: Optional[bool] = Field(default=True, description="") def get_sql_alchemy_url(self, current_db=None): - url = make_url( super().get_sql_alchemy_url(uri_opts=self.uri_opts, current_db=current_db) ) @@ -158,42 +156,11 @@ def get_sql_alchemy_url(self, current_db=None): ) # We can setup clickhouse ingestion in sqlalchemy_uri form and config form. - - # If we use sqlalchemu_uri form then super().get_sql_alchemy_url doesn't - # update current_db because it return self.sqlalchemy_uri without any update. - # This code bellow needed for rewriting sqlalchemi_uri and replace database with current_db.from - # For the future without python3.7 and sqlalchemy 1.3 support we can use code - # url=url.set(db=current_db), but not now. - # Why we need to update database in uri at all? # Because we get database from sqlalchemy inspector and inspector we form from url inherited from # TwoTierSQLAlchemySource and SQLAlchemySource - if self.sqlalchemy_uri and current_db: - self.scheme = url.drivername - self.username = url.username - self.password = ( - pydantic.SecretStr(str(url.password)) - if url.password - else pydantic.SecretStr("") - ) - if url.host and url.port: - self.host_port = url.host + ":" + str(url.port) - elif url.host: - self.host_port = url.host - # untill released https://github.com/python/mypy/pull/15174 - self.uri_opts = {str(k): str(v) for (k, v) in url.query.items()} - - url = make_url( - make_sqlalchemy_uri( - self.scheme, - self.username, - self.password.get_secret_value() if self.password else None, - self.host_port, - current_db if current_db else self.database, - uri_opts=self.uri_opts, - ) - ) + url = url.set(database=current_db) return str(url) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 3c7701d93edebc..685d4fb3074c92 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -46,12 +46,17 @@ BasicSQLAlchemyConfig, make_sqlalchemy_uri, ) -from datahub.metadata.schema_classes import BooleanTypeClass, UnionTypeClass +from datahub.metadata.schema_classes import ( + BooleanTypeClass, + StringTypeClass, + UnionTypeClass, +) logger: logging.Logger = logging.getLogger(__name__) register_custom_type(sqlalchemy.dialects.mssql.BIT, BooleanTypeClass) register_custom_type(sqlalchemy.dialects.mssql.SQL_VARIANT, UnionTypeClass) +register_custom_type(sqlalchemy.dialects.mssql.UNIQUEIDENTIFIER, StringTypeClass) class SQLServerConfig(BasicSQLAlchemyConfig): diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py index 855958f0755e1e..f659ea0c1c5c0e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py @@ -143,11 +143,7 @@ def _get_clickhouse_history(self): results = engine.execute(query) events = [] for row in results: - # minor type conversion - if hasattr(row, "_asdict"): - event_dict = row._asdict() - else: - event_dict = dict(row) + event_dict = row._asdict() # stripping extra spaces caused by above _asdict() conversion for k, v in event_dict.items(): diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py index 99a980b326e531..691eaa8211054c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py @@ -298,9 +298,7 @@ def _gen_access_events_from_history_query( for row in results: if not self._should_process_row(row): continue - if hasattr(row, "_asdict"): - # Compatibility with sqlalchemy 1.4.x. - row = row._asdict() + row = row._asdict() access_event = RedshiftAccessEvent(**dict(row.items())) # Replace database name with the alias name if one is provided in the config. if self.config.database_alias: diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py index 9394a8bba5e0b6..c38800b3a69838 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py @@ -162,11 +162,7 @@ def _get_trino_history(self): results = engine.execute(query) events = [] for row in results: - # minor type conversion - if hasattr(row, "_asdict"): - event_dict = row._asdict() - else: - event_dict = dict(row) + event_dict = row._asdict() # stripping extra spaces caused by above _asdict() conversion for k, v in event_dict.items(): diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json new file mode 100644 index 00000000000000..b0f66e7b245c96 --- /dev/null +++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json @@ -0,0 +1,499 @@ +[ +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { + "urn": "urn:li:dashboard:(looker,dashboards.1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { + "customProperties": {}, + "title": "foo", + "description": "lorem ipsum", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 1586847600000, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "https://looker.company.com/dashboards/1" + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/dashboards/1" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaField": { + "fieldPath": "calc", + "nullable": false, + "description": "", + "label": "foobar", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaField": { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaField": { + "fieldPath": "calc", + "nullable": false, + "description": "", + "label": "foobar", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaField": { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/looker/looker_hub/explores" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.explore.label": "My Explore View", + "looker.explore.file": "test_source_file.lkml" + }, + "externalUrl": "https://looker.company.com/explore/data/my_view", + "name": "My Explore View", + "description": "lorem ipsum", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,datahub-demo.views.datahub-demo.datasets.faa_flights.view.faa_flights,PROD)", + "type": "VIEW" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "my_view", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Explore" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/explore/data/my_view" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "looker_hub" + }, + { + "id": "explores" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Dimension", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Dimension", + "description": "A tag that is applied to all dimension fields." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Temporal", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Temporal", + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Measure", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Measure", + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Dimension", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Measure", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Temporal", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index 9dc15fae3a23ba..ee6610cf75679c 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -473,9 +473,23 @@ def setup_mock_explore_unaliased_with_joins(mocked_client): ) -def setup_mock_explore(mocked_client): +def setup_mock_explore( + mocked_client: Any, additional_lkml_fields: List[LookmlModelExploreField] = [] +) -> None: mock_model = mock.MagicMock(project_name="lkml_samples") mocked_client.lookml_model.return_value = mock_model + + lkml_fields: List[LookmlModelExploreField] = [ + LookmlModelExploreField( + name="dim1", + type="string", + dimension_group=None, + description="dimension one description", + label_short="Dimensions One Label", + ) + ] + lkml_fields.extend(additional_lkml_fields) + mocked_client.lookml_model_explore.return_value = LookmlModelExplore( id="1", name="my_explore_name", @@ -484,15 +498,7 @@ def setup_mock_explore(mocked_client): view_name="underlying_view", project_name="lkml_samples", fields=LookmlModelExploreFieldset( - dimensions=[ - LookmlModelExploreField( - name="dim1", - type="string", - dimension_group=None, - description="dimension one description", - label_short="Dimensions One Label", - ) - ] + dimensions=lkml_fields, ), source_file="test_source_file.lkml", ) @@ -905,6 +911,55 @@ def test_independent_looks_ingest( ) +@freeze_time(FROZEN_TIME) +def test_file_path_in_view_naming_pattern( + pytestconfig, tmp_path, mock_time, mock_datahub_graph +): + mocked_client = mock.MagicMock() + new_recipe = get_default_recipe(output_file_path=f"{tmp_path}/looker_mces.json") + new_recipe["source"]["config"][ + "view_naming_pattern" + ] = "{project}.{file_path}.view.{name}" + + with mock.patch( + "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", + mock_datahub_graph, + ) as mock_checkpoint, mock.patch("looker_sdk.init40") as mock_sdk: + mock_checkpoint.return_value = mock_datahub_graph + + mock_sdk.return_value = mocked_client + setup_mock_dashboard(mocked_client) + setup_mock_explore( + mocked_client, + additional_lkml_fields=[ + LookmlModelExploreField( + name="dim2", + type="string", + dimension_group=None, + description="dimension one description", + label_short="Dimensions One Label", + view="underlying_view", + source_file="views/underlying_view.view.lkml", + ) + ], + ) + setup_mock_look(mocked_client) + setup_mock_external_project_view_explore(mocked_client) + + test_resources_dir = pytestconfig.rootpath / "tests/integration/looker" + + pipeline = Pipeline.create(new_recipe) + pipeline.run() + pipeline.raise_from_status() + mce_out_file = "golden_test_file_path_ingest.json" + + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "looker_mces.json", + golden_path=f"{test_resources_dir}/{mce_out_file}", + ) + + @freeze_time(FROZEN_TIME) def test_independent_soft_deleted_looks( pytestconfig, diff --git a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json index dc5e1aa9096f84..ce4123fb7e93d6 100644 --- a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json +++ b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json @@ -2,12 +2,12 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.foo.view.my_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/foo.view.lkml/views" ] } }, @@ -176,12 +176,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.foo.view.my_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -193,12 +194,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.foo.view.my_view,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -210,12 +212,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.foo.view.my_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -227,6 +230,9 @@ { "id": "lkml_samples" }, + { + "id": "foo.view.lkml" + }, { "id": "views" } @@ -235,18 +241,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_derived_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/bar.view.lkml/views" ] } }, @@ -263,7 +270,7 @@ "time": 1586847600000, "actor": "urn:li:corpuser:datahub" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_view,PROD)", "type": "VIEW" } ] @@ -394,12 +401,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_derived_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -411,12 +419,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_derived_view,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -428,12 +437,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_derived_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -445,6 +455,9 @@ { "id": "lkml_samples" }, + { + "id": "bar.view.lkml" + }, { "id": "views" } @@ -453,18 +466,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.included_view_file.view.include_able_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/included_view_file.view.lkml/views" ] } }, @@ -501,12 +515,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.included_view_file.view.include_able_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -518,12 +533,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.included_view_file.view.include_able_view,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -535,12 +551,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.included_view_file.view.include_able_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -552,6 +569,9 @@ { "id": "lkml_samples" }, + { + "id": "included_view_file.view.lkml" + }, { "id": "views" } @@ -560,18 +580,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.looker_events,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/view_declarations.view.lkml/views" ] } }, @@ -608,12 +629,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.looker_events,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -625,12 +647,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.looker_events,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -642,12 +665,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.looker_events,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -659,6 +683,9 @@ { "id": "lkml_samples" }, + { + "id": "view_declarations.view.lkml" + }, { "id": "views" } @@ -667,18 +694,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/view_declarations.view.lkml/views" ] } }, @@ -760,12 +788,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -777,12 +806,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -794,12 +824,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -811,6 +842,9 @@ { "id": "lkml_samples" }, + { + "id": "view_declarations.view.lkml" + }, { "id": "views" } @@ -819,18 +853,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.autodetect_sql_name_based_on_view_name,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/view_declarations.view.lkml/views" ] } }, @@ -867,12 +902,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -884,12 +920,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -901,12 +938,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -918,6 +956,9 @@ { "id": "lkml_samples" }, + { + "id": "view_declarations.view.lkml" + }, { "id": "views" } @@ -926,18 +967,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.test_include_external_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/view_declarations.view.lkml/views" ] } }, @@ -974,12 +1016,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.test_include_external_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -991,12 +1034,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.test_include_external_view,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1008,12 +1052,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.test_include_external_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1025,6 +1070,9 @@ { "id": "lkml_samples" }, + { + "id": "view_declarations.view.lkml" + }, { "id": "views" } @@ -1033,18 +1081,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.nested.fragment_derived.view.fragment_derived_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/nested/fragment_derived.view.lkml/views" ] } }, @@ -1156,12 +1205,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.nested.fragment_derived.view.fragment_derived_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1173,12 +1223,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.nested.fragment_derived.view.fragment_derived_view,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1190,12 +1241,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.nested.fragment_derived.view.fragment_derived_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1207,6 +1259,12 @@ { "id": "lkml_samples" }, + { + "id": "nested" + }, + { + "id": "fragment_derived.view.lkml" + }, { "id": "views" } @@ -1215,18 +1273,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.liquid.view.customer_facts,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/liquid.view.lkml/views" ] } }, @@ -1263,12 +1322,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.liquid.view.customer_facts,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1280,12 +1340,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.liquid.view.customer_facts,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1297,12 +1358,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.liquid.view.customer_facts,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1314,6 +1376,9 @@ { "id": "lkml_samples" }, + { + "id": "liquid.view.lkml" + }, { "id": "views" } @@ -1322,18 +1387,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/ability.view.lkml/views" ] } }, @@ -1362,7 +1428,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD),pk)" ], "confidenceScore": 1.0 } @@ -1449,12 +1515,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1466,12 +1533,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1483,12 +1551,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1500,6 +1569,9 @@ { "id": "lkml_samples" }, + { + "id": "ability.view.lkml" + }, { "id": "views" } @@ -1508,18 +1580,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/owners.view.lkml/views" ] } }, @@ -1548,7 +1621,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD),id)" ], "confidenceScore": 1.0 }, @@ -1559,7 +1632,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD),owner_name)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD),owner_name)" ], "confidenceScore": 1.0 }, @@ -1570,7 +1643,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD),has_owner_name)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD),has_owner_name)" ], "confidenceScore": 1.0 } @@ -1680,12 +1753,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1697,12 +1771,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1714,12 +1789,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1731,6 +1807,9 @@ { "id": "lkml_samples" }, + { + "id": "owners.view.lkml" + }, { "id": "views" } @@ -1739,18 +1818,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/native_derived_table.view.lkml/views" ] } }, @@ -1779,7 +1859,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),country)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD),country)" ], "confidenceScore": 1.0 }, @@ -1790,7 +1870,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD),city)" ], "confidenceScore": 1.0 } @@ -1919,12 +1999,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1936,12 +2017,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1953,12 +2035,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1970,6 +2053,9 @@ { "id": "lkml_samples" }, + { + "id": "native_derived_table.view.lkml" + }, { "id": "views" } @@ -1978,18 +2064,19 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/prod/looker/lkml_samples/views" + "/prod/looker/lkml_samples/flights.view.lkml/views" ] } }, @@ -2018,7 +2105,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD),id)" ], "confidenceScore": 1.0 } @@ -2086,12 +2173,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2103,12 +2191,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -2120,12 +2209,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -2137,6 +2227,9 @@ { "id": "lkml_samples" }, + { + "id": "flights.view.lkml" + }, { "id": "views" } @@ -2145,7 +2238,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2160,7 +2254,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2175,7 +2270,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2190,7 +2286,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test" + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index 85eb4dcd92ec9b..21a0b19849d975 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -99,6 +99,15 @@ def test_lookml_refinement_ingest(pytestconfig, tmp_path, mock_time): f"{tmp_path}/{mce_out_file}", f"{test_resources_dir}/lkml_samples" ) new_recipe["source"]["config"]["process_refinements"] = True + + new_recipe["source"]["config"][ + "view_naming_pattern" + ] = "{project}.{file_path}.view.{name}" + + new_recipe["source"]["config"][ + "view_browse_pattern" + ] = "/{env}/{platform}/{project}/{file_path}/views" + pipeline = Pipeline.create(new_recipe) pipeline.run() pipeline.pretty_print_summary() diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 67a563baa561cd..a495d04c4e398c 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -1752,6 +1752,18 @@ "recursive": false, "isPartOfKey": true }, + { + "fieldPath": "SomeId", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "UNIQUEIDENTIFIER()", + "recursive": false, + "isPartOfKey": false + }, { "fieldPath": "Name", "nullable": true, diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index ef6033dd919435..8277ff8bf7e89a 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -1752,6 +1752,18 @@ "recursive": false, "isPartOfKey": true }, + { + "fieldPath": "SomeId", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "UNIQUEIDENTIFIER()", + "recursive": false, + "isPartOfKey": false + }, { "fieldPath": "Name", "nullable": true, diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 8098accebb424c..f3714bba6364d0 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -1752,6 +1752,18 @@ "recursive": false, "isPartOfKey": true }, + { + "fieldPath": "SomeId", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "UNIQUEIDENTIFIER()", + "recursive": false, + "isPartOfKey": false + }, { "fieldPath": "Name", "nullable": true, diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index d32002fb5648cc..d25d23daae2eac 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -1752,6 +1752,18 @@ "recursive": false, "isPartOfKey": true }, + { + "fieldPath": "SomeId", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "UNIQUEIDENTIFIER()", + "recursive": false, + "isPartOfKey": false + }, { "fieldPath": "Name", "nullable": true, diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index 2ff46e249007a6..c1347a7c8cacaf 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -34,7 +34,8 @@ CREATE TABLE Foo.Persons ( GO CREATE TABLE Foo.SalesReason ( - TempID int NOT NULL, + TempID int NOT NULL, + SomeId UNIQUEIDENTIFIER NOT NULL DEFAULT NEWID(), Name nvarchar(50) , CONSTRAINT PK_TempSales PRIMARY KEY NONCLUSTERED (TempID) , CONSTRAINT FK_TempSales_SalesReason FOREIGN KEY (TempID) @@ -49,20 +50,20 @@ AS SELECT @ID AS ThatDB; GO -GO -EXEC sys.sp_addextendedproperty -@name = N'MS_Description', -@value = N'Description for table Items of schema Foo.', -@level0type = N'SCHEMA', @level0name = 'Foo', -@level1type = N'TABLE', @level1name = 'Items'; +GO +EXEC sys.sp_addextendedproperty +@name = N'MS_Description', +@value = N'Description for table Items of schema Foo.', +@level0type = N'SCHEMA', @level0name = 'Foo', +@level1type = N'TABLE', @level1name = 'Items'; GO -GO -EXEC sys.sp_addextendedproperty -@name = N'MS_Description', -@value = N'Description for column LastName of table Persons of schema Foo.', -@level0type = N'SCHEMA', @level0name = 'Foo', -@level1type = N'TABLE', @level1name = 'Persons', +GO +EXEC sys.sp_addextendedproperty +@name = N'MS_Description', +@value = N'Description for column LastName of table Persons of schema Foo.', +@level0type = N'SCHEMA', @level0name = 'Foo', +@level1type = N'TABLE', @level1name = 'Persons', @level2type = N'COLUMN',@level2name = 'LastName'; GO USE msdb ; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java index 4bbff3915aca93..b3e05d966e36b7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java @@ -121,8 +121,7 @@ private static Map getMappingsForField(@Nonnull final Searchable String analyzerName = entry.getValue(); subFields.put(fieldName, ImmutableMap.of( TYPE, TEXT, - ANALYZER, analyzerName, - SEARCH_ANALYZER, analyzerName + ANALYZER, analyzerName )); } } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java index 2be719ed263ea5..06545ef3525dd6 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java @@ -200,10 +200,19 @@ private void handleSourceDatasetEvent(MetadataChangeLog event, DatasetUrn source UpstreamLineage upstreamLineage = getUpstreamLineageFromEvent(event); if (upstreamLineage != null && upstreamLineage.hasUpstreams()) { UpstreamArray upstreams = upstreamLineage.getUpstreams(); - if ( - upstreams.size() == 1 - && upstreams.get(0).getDataset().getPlatformEntity().getPlatformNameEntity().equals(DBT_PLATFORM_NAME)) { - setSiblingsAndSoftDeleteSibling(upstreams.get(0).getDataset(), sourceUrn); + + // an entity can have merged lineage (eg. dbt + snowflake), but by default siblings are only between dbt <> non-dbt + UpstreamArray dbtUpstreams = new UpstreamArray( + upstreams.stream() + .filter(obj -> obj.getDataset().getPlatformEntity().getPlatformNameEntity().equals(DBT_PLATFORM_NAME)) + .collect(Collectors.toList()) + ); + // We're assuming a data asset (eg. snowflake table) will only ever be downstream of 1 dbt model + if (dbtUpstreams.size() == 1) { + setSiblingsAndSoftDeleteSibling(dbtUpstreams.get(0).getDataset(), sourceUrn); + } else { + log.error("{} has an unexpected number of dbt upstreams: {}. Not adding any as siblings.", sourceUrn.toString(), dbtUpstreams.size()); + } } } @@ -219,7 +228,7 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { existingDbtSiblingAspect != null && existingSourceSiblingAspect != null && existingDbtSiblingAspect.getSiblings().contains(sourceUrn.toString()) - && existingDbtSiblingAspect.getSiblings().contains(dbtUrn.toString()) + && existingSourceSiblingAspect.getSiblings().contains(dbtUrn.toString()) ) { // we have already connected them- we can abort here return; diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java index 5fb2cfaaef2d11..78d304d67bfc09 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java @@ -36,6 +36,8 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import java.net.URISyntaxException; + import static com.linkedin.metadata.Constants.*; import static org.mockito.ArgumentMatchers.*; @@ -78,15 +80,12 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { _mockAuthentication )).thenReturn(mockResponse); - MetadataChangeLog event = new MetadataChangeLog(); - event.setEntityType(DATASET_ENTITY_NAME); - event.setAspectName(UPSTREAM_LINEAGE_ASPECT_NAME); - event.setChangeType(ChangeType.UPSERT); + + MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT); + + Upstream upstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED); final UpstreamLineage upstreamLineage = new UpstreamLineage(); final UpstreamArray upstreamArray = new UpstreamArray(); - final Upstream upstream = new Upstream(); - upstream.setType(DatasetLineageType.TRANSFORMED); - upstream.setDataset(DatasetUrn.createFromString("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)")); upstreamArray.add(upstream); upstreamLineage.setUpstreams(upstreamArray); @@ -151,15 +150,11 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception { _mockAuthentication )).thenReturn(mockResponse); - MetadataChangeLog event = new MetadataChangeLog(); - event.setEntityType(DATASET_ENTITY_NAME); - event.setAspectName(UPSTREAM_LINEAGE_ASPECT_NAME); - event.setChangeType(ChangeType.UPSERT); + MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT); + Upstream upstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED); + final UpstreamLineage upstreamLineage = new UpstreamLineage(); final UpstreamArray upstreamArray = new UpstreamArray(); - final Upstream upstream = new Upstream(); - upstream.setType(DatasetLineageType.TRANSFORMED); - upstream.setDataset(DatasetUrn.createFromString("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)")); upstreamArray.add(upstream); upstreamLineage.setUpstreams(upstreamArray); @@ -189,15 +184,11 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception { public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Exception { Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); - MetadataChangeLog event = new MetadataChangeLog(); - event.setEntityType(DATASET_ENTITY_NAME); - event.setAspectName(UPSTREAM_LINEAGE_ASPECT_NAME); - event.setChangeType(ChangeType.UPSERT); + + MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT); final UpstreamLineage upstreamLineage = new UpstreamLineage(); final UpstreamArray upstreamArray = new UpstreamArray(); - final Upstream upstream = new Upstream(); - upstream.setType(DatasetLineageType.TRANSFORMED); - upstream.setDataset(DatasetUrn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)")); + Upstream upstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED); upstreamArray.add(upstream); upstreamLineage.setUpstreams(upstreamArray); @@ -259,10 +250,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { .setSkipAggregates(true).setSkipHighlighting(true)) )).thenReturn(returnSearchResult); - MetadataChangeLog event = new MetadataChangeLog(); - event.setEntityType(DATASET_ENTITY_NAME); - event.setAspectName(DATASET_KEY_ASPECT_NAME); - event.setChangeType(ChangeType.UPSERT); + MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, DATASET_KEY_ASPECT_NAME, ChangeType.UPSERT); final DatasetKey datasetKey = new DatasetKey(); datasetKey.setName("my-proj.jaffle_shop.customers"); datasetKey.setOrigin(FabricType.PROD); @@ -304,4 +292,76 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { Mockito.eq(_mockAuthentication) ); } -} + @Test + public void testInvokeWhenSourceUrnHasTwoDbtUpstreams() throws Exception { + + MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT); + final UpstreamLineage upstreamLineage = new UpstreamLineage(); + final UpstreamArray upstreamArray = new UpstreamArray(); + Upstream dbtUpstream1 = createUpstream("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.source_entity1,PROD)", DatasetLineageType.TRANSFORMED); + Upstream dbtUpstream2 = createUpstream("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.source_entity2,PROD)", DatasetLineageType.TRANSFORMED); + upstreamArray.add(dbtUpstream1); + upstreamArray.add(dbtUpstream2); + upstreamLineage.setUpstreams(upstreamArray); + + event.setAspect(GenericRecordUtils.serializeAspect(upstreamLineage)); + event.setEntityUrn(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)")); + _siblingAssociationHook.invoke(event); + + + Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal( + Mockito.any(), + Mockito.eq(_mockAuthentication) + ); + + + } + + @Test + public void testInvokeWhenSourceUrnHasTwoUpstreamsOneDbt() throws Exception { + + MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT); + final UpstreamLineage upstreamLineage = new UpstreamLineage(); + final UpstreamArray upstreamArray = new UpstreamArray(); + Upstream dbtUpstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.source_entity1,PROD)", DatasetLineageType.TRANSFORMED); + Upstream snowflakeUpstream = + createUpstream("urn:li:dataset:(urn:li:dataPlatform:snowflake,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED); + upstreamArray.add(dbtUpstream); + upstreamArray.add(snowflakeUpstream); + upstreamLineage.setUpstreams(upstreamArray); + + event.setAspect(GenericRecordUtils.serializeAspect(upstreamLineage)); + event.setEntityUrn(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)")); + _siblingAssociationHook.invoke(event); + + + Mockito.verify(_mockEntityClient, Mockito.times(2)).ingestProposal( + Mockito.any(), + Mockito.eq(_mockAuthentication) + ); + + + } + + private MetadataChangeLog createEvent(String entityType, String aspectName, ChangeType changeType) { + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(entityType); + event.setAspectName(aspectName); + event.setChangeType(changeType); + return event; + } + private Upstream createUpstream(String urn, DatasetLineageType upstreamType) { + + final Upstream upstream = new Upstream(); + upstream.setType(upstreamType); + try { + upstream.setDataset(DatasetUrn.createFromString(urn)); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + + return upstream; + } + + + }